~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/s390/mm/gmap.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/s390/mm/gmap.c (Version linux-6.12-rc7) and /arch/m68k/mm/gmap.c (Version linux-6.9.12)


  1 // SPDX-License-Identifier: GPL-2.0                 1 
  2 /*                                                
  3  *  KVM guest address space mapping code          
  4  *                                                
  5  *    Copyright IBM Corp. 2007, 2020              
  6  *    Author(s): Martin Schwidefsky <schwidefs    
  7  *               David Hildenbrand <david@redh    
  8  *               Janosch Frank <frankja@linux.    
  9  */                                               
 10                                                   
 11 #include <linux/kernel.h>                         
 12 #include <linux/pagewalk.h>                       
 13 #include <linux/swap.h>                           
 14 #include <linux/smp.h>                            
 15 #include <linux/spinlock.h>                       
 16 #include <linux/slab.h>                           
 17 #include <linux/swapops.h>                        
 18 #include <linux/ksm.h>                            
 19 #include <linux/mman.h>                           
 20 #include <linux/pgtable.h>                        
 21 #include <asm/page-states.h>                      
 22 #include <asm/pgalloc.h>                          
 23 #include <asm/gmap.h>                             
 24 #include <asm/page.h>                             
 25 #include <asm/tlb.h>                              
 26                                                   
 27 #define GMAP_SHADOW_FAKE_TABLE 1ULL               
 28                                                   
 29 static struct page *gmap_alloc_crst(void)         
 30 {                                                 
 31         struct page *page;                        
 32                                                   
 33         page = alloc_pages(GFP_KERNEL_ACCOUNT,    
 34         if (!page)                                
 35                 return NULL;                      
 36         __arch_set_page_dat(page_to_virt(page)    
 37         return page;                              
 38 }                                                 
 39                                                   
 40 /**                                               
 41  * gmap_alloc - allocate and initialize a gues    
 42  * @limit: maximum address of the gmap address    
 43  *                                                
 44  * Returns a guest address space structure.       
 45  */                                               
 46 static struct gmap *gmap_alloc(unsigned long l    
 47 {                                                 
 48         struct gmap *gmap;                        
 49         struct page *page;                        
 50         unsigned long *table;                     
 51         unsigned long etype, atype;               
 52                                                   
 53         if (limit < _REGION3_SIZE) {              
 54                 limit = _REGION3_SIZE - 1;        
 55                 atype = _ASCE_TYPE_SEGMENT;       
 56                 etype = _SEGMENT_ENTRY_EMPTY;     
 57         } else if (limit < _REGION2_SIZE) {       
 58                 limit = _REGION2_SIZE - 1;        
 59                 atype = _ASCE_TYPE_REGION3;       
 60                 etype = _REGION3_ENTRY_EMPTY;     
 61         } else if (limit < _REGION1_SIZE) {       
 62                 limit = _REGION1_SIZE - 1;        
 63                 atype = _ASCE_TYPE_REGION2;       
 64                 etype = _REGION2_ENTRY_EMPTY;     
 65         } else {                                  
 66                 limit = -1UL;                     
 67                 atype = _ASCE_TYPE_REGION1;       
 68                 etype = _REGION1_ENTRY_EMPTY;     
 69         }                                         
 70         gmap = kzalloc(sizeof(struct gmap), GF    
 71         if (!gmap)                                
 72                 goto out;                         
 73         INIT_LIST_HEAD(&gmap->crst_list);         
 74         INIT_LIST_HEAD(&gmap->children);          
 75         INIT_LIST_HEAD(&gmap->pt_list);           
 76         INIT_RADIX_TREE(&gmap->guest_to_host,     
 77         INIT_RADIX_TREE(&gmap->host_to_guest,     
 78         INIT_RADIX_TREE(&gmap->host_to_rmap, G    
 79         spin_lock_init(&gmap->guest_table_lock    
 80         spin_lock_init(&gmap->shadow_lock);       
 81         refcount_set(&gmap->ref_count, 1);        
 82         page = gmap_alloc_crst();                 
 83         if (!page)                                
 84                 goto out_free;                    
 85         page->index = 0;                          
 86         list_add(&page->lru, &gmap->crst_list)    
 87         table = page_to_virt(page);               
 88         crst_table_init(table, etype);            
 89         gmap->table = table;                      
 90         gmap->asce = atype | _ASCE_TABLE_LENGT    
 91                 _ASCE_USER_BITS | __pa(table);    
 92         gmap->asce_end = limit;                   
 93         return gmap;                              
 94                                                   
 95 out_free:                                         
 96         kfree(gmap);                              
 97 out:                                              
 98         return NULL;                              
 99 }                                                 
100                                                   
101 /**                                               
102  * gmap_create - create a guest address space     
103  * @mm: pointer to the parent mm_struct           
104  * @limit: maximum size of the gmap address sp    
105  *                                                
106  * Returns a guest address space structure.       
107  */                                               
108 struct gmap *gmap_create(struct mm_struct *mm,    
109 {                                                 
110         struct gmap *gmap;                        
111         unsigned long gmap_asce;                  
112                                                   
113         gmap = gmap_alloc(limit);                 
114         if (!gmap)                                
115                 return NULL;                      
116         gmap->mm = mm;                            
117         spin_lock(&mm->context.lock);             
118         list_add_rcu(&gmap->list, &mm->context    
119         if (list_is_singular(&mm->context.gmap    
120                 gmap_asce = gmap->asce;           
121         else                                      
122                 gmap_asce = -1UL;                 
123         WRITE_ONCE(mm->context.gmap_asce, gmap    
124         spin_unlock(&mm->context.lock);           
125         return gmap;                              
126 }                                                 
127 EXPORT_SYMBOL_GPL(gmap_create);                   
128                                                   
129 static void gmap_flush_tlb(struct gmap *gmap)     
130 {                                                 
131         if (MACHINE_HAS_IDTE)                     
132                 __tlb_flush_idte(gmap->asce);     
133         else                                      
134                 __tlb_flush_global();             
135 }                                                 
136                                                   
137 static void gmap_radix_tree_free(struct radix_    
138 {                                                 
139         struct radix_tree_iter iter;              
140         unsigned long indices[16];                
141         unsigned long index;                      
142         void __rcu **slot;                        
143         int i, nr;                                
144                                                   
145         /* A radix tree is freed by deleting a    
146         index = 0;                                
147         do {                                      
148                 nr = 0;                           
149                 radix_tree_for_each_slot(slot,    
150                         indices[nr] = iter.ind    
151                         if (++nr == 16)           
152                                 break;            
153                 }                                 
154                 for (i = 0; i < nr; i++) {        
155                         index = indices[i];       
156                         radix_tree_delete(root    
157                 }                                 
158         } while (nr > 0);                         
159 }                                                 
160                                                   
161 static void gmap_rmap_radix_tree_free(struct r    
162 {                                                 
163         struct gmap_rmap *rmap, *rnext, *head;    
164         struct radix_tree_iter iter;              
165         unsigned long indices[16];                
166         unsigned long index;                      
167         void __rcu **slot;                        
168         int i, nr;                                
169                                                   
170         /* A radix tree is freed by deleting a    
171         index = 0;                                
172         do {                                      
173                 nr = 0;                           
174                 radix_tree_for_each_slot(slot,    
175                         indices[nr] = iter.ind    
176                         if (++nr == 16)           
177                                 break;            
178                 }                                 
179                 for (i = 0; i < nr; i++) {        
180                         index = indices[i];       
181                         head = radix_tree_dele    
182                         gmap_for_each_rmap_saf    
183                                 kfree(rmap);      
184                 }                                 
185         } while (nr > 0);                         
186 }                                                 
187                                                   
188 /**                                               
189  * gmap_free - free a guest address space         
190  * @gmap: pointer to the guest address space s    
191  *                                                
192  * No locks required. There are no references     
193  */                                               
194 static void gmap_free(struct gmap *gmap)          
195 {                                                 
196         struct page *page, *next;                 
197                                                   
198         /* Flush tlb of all gmaps (if not alre    
199         if (!(gmap_is_shadow(gmap) && gmap->re    
200                 gmap_flush_tlb(gmap);             
201         /* Free all segment & region tables. *    
202         list_for_each_entry_safe(page, next, &    
203                 __free_pages(page, CRST_ALLOC_    
204         gmap_radix_tree_free(&gmap->guest_to_h    
205         gmap_radix_tree_free(&gmap->host_to_gu    
206                                                   
207         /* Free additional data for a shadow g    
208         if (gmap_is_shadow(gmap)) {               
209                 struct ptdesc *ptdesc, *n;        
210                                                   
211                 /* Free all page tables. */       
212                 list_for_each_entry_safe(ptdes    
213                         page_table_free_pgste(    
214                 gmap_rmap_radix_tree_free(&gma    
215                 /* Release reference to the pa    
216                 gmap_put(gmap->parent);           
217         }                                         
218                                                   
219         kfree(gmap);                              
220 }                                                 
221                                                   
222 /**                                               
223  * gmap_get - increase reference counter for g    
224  * @gmap: pointer to the guest address space s    
225  *                                                
226  * Returns the gmap pointer                       
227  */                                               
228 struct gmap *gmap_get(struct gmap *gmap)          
229 {                                                 
230         refcount_inc(&gmap->ref_count);           
231         return gmap;                              
232 }                                                 
233 EXPORT_SYMBOL_GPL(gmap_get);                      
234                                                   
235 /**                                               
236  * gmap_put - decrease reference counter for g    
237  * @gmap: pointer to the guest address space s    
238  *                                                
239  * If the reference counter reaches zero the g    
240  */                                               
241 void gmap_put(struct gmap *gmap)                  
242 {                                                 
243         if (refcount_dec_and_test(&gmap->ref_c    
244                 gmap_free(gmap);                  
245 }                                                 
246 EXPORT_SYMBOL_GPL(gmap_put);                      
247                                                   
248 /**                                               
249  * gmap_remove - remove a guest address space     
250  * @gmap: pointer to the guest address space s    
251  */                                               
252 void gmap_remove(struct gmap *gmap)               
253 {                                                 
254         struct gmap *sg, *next;                   
255         unsigned long gmap_asce;                  
256                                                   
257         /* Remove all shadow gmaps linked to t    
258         if (!list_empty(&gmap->children)) {       
259                 spin_lock(&gmap->shadow_lock);    
260                 list_for_each_entry_safe(sg, n    
261                         list_del(&sg->list);      
262                         gmap_put(sg);             
263                 }                                 
264                 spin_unlock(&gmap->shadow_lock    
265         }                                         
266         /* Remove gmap from the pre-mm list */    
267         spin_lock(&gmap->mm->context.lock);       
268         list_del_rcu(&gmap->list);                
269         if (list_empty(&gmap->mm->context.gmap    
270                 gmap_asce = 0;                    
271         else if (list_is_singular(&gmap->mm->c    
272                 gmap_asce = list_first_entry(&    
273                                              s    
274         else                                      
275                 gmap_asce = -1UL;                 
276         WRITE_ONCE(gmap->mm->context.gmap_asce    
277         spin_unlock(&gmap->mm->context.lock);     
278         synchronize_rcu();                        
279         /* Put reference */                       
280         gmap_put(gmap);                           
281 }                                                 
282 EXPORT_SYMBOL_GPL(gmap_remove);                   
283                                                   
284 /**                                               
285  * gmap_enable - switch primary space to the g    
286  * @gmap: pointer to the guest address space s    
287  */                                               
288 void gmap_enable(struct gmap *gmap)               
289 {                                                 
290         get_lowcore()->gmap = (unsigned long)g    
291 }                                                 
292 EXPORT_SYMBOL_GPL(gmap_enable);                   
293                                                   
294 /**                                               
295  * gmap_disable - switch back to the standard     
296  * @gmap: pointer to the guest address space s    
297  */                                               
298 void gmap_disable(struct gmap *gmap)              
299 {                                                 
300         get_lowcore()->gmap = 0UL;                
301 }                                                 
302 EXPORT_SYMBOL_GPL(gmap_disable);                  
303                                                   
304 /**                                               
305  * gmap_get_enabled - get a pointer to the cur    
306  *                                                
307  * Returns a pointer to the currently enabled     
308  */                                               
309 struct gmap *gmap_get_enabled(void)               
310 {                                                 
311         return (struct gmap *)get_lowcore()->g    
312 }                                                 
313 EXPORT_SYMBOL_GPL(gmap_get_enabled);              
314                                                   
315 /*                                                
316  * gmap_alloc_table is assumed to be called wi    
317  */                                               
318 static int gmap_alloc_table(struct gmap *gmap,    
319                             unsigned long init    
320 {                                                 
321         struct page *page;                        
322         unsigned long *new;                       
323                                                   
324         /* since we dont free the gmap table u    
325         page = gmap_alloc_crst();                 
326         if (!page)                                
327                 return -ENOMEM;                   
328         new = page_to_virt(page);                 
329         crst_table_init(new, init);               
330         spin_lock(&gmap->guest_table_lock);       
331         if (*table & _REGION_ENTRY_INVALID) {     
332                 list_add(&page->lru, &gmap->cr    
333                 *table = __pa(new) | _REGION_E    
334                         (*table & _REGION_ENTR    
335                 page->index = gaddr;              
336                 page = NULL;                      
337         }                                         
338         spin_unlock(&gmap->guest_table_lock);     
339         if (page)                                 
340                 __free_pages(page, CRST_ALLOC_    
341         return 0;                                 
342 }                                                 
343                                                   
344 /**                                               
345  * __gmap_segment_gaddr - find virtual address    
346  * @entry: pointer to a segment table entry in    
347  *                                                
348  * Returns the virtual address in the guest ad    
349  */                                               
350 static unsigned long __gmap_segment_gaddr(unsi    
351 {                                                 
352         struct page *page;                        
353         unsigned long offset;                     
354                                                   
355         offset = (unsigned long) entry / sizeo    
356         offset = (offset & (PTRS_PER_PMD - 1))    
357         page = pmd_pgtable_page((pmd_t *) entr    
358         return page->index + offset;              
359 }                                                 
360                                                   
361 /**                                               
362  * __gmap_unlink_by_vmaddr - unlink a single s    
363  * @gmap: pointer to the guest address space s    
364  * @vmaddr: address in the host process addres    
365  *                                                
366  * Returns 1 if a TLB flush is required           
367  */                                               
368 static int __gmap_unlink_by_vmaddr(struct gmap    
369 {                                                 
370         unsigned long *entry;                     
371         int flush = 0;                            
372                                                   
373         BUG_ON(gmap_is_shadow(gmap));             
374         spin_lock(&gmap->guest_table_lock);       
375         entry = radix_tree_delete(&gmap->host_    
376         if (entry) {                              
377                 flush = (*entry != _SEGMENT_EN    
378                 *entry = _SEGMENT_ENTRY_EMPTY;    
379         }                                         
380         spin_unlock(&gmap->guest_table_lock);     
381         return flush;                             
382 }                                                 
383                                                   
384 /**                                               
385  * __gmap_unmap_by_gaddr - unmap a single segm    
386  * @gmap: pointer to the guest address space s    
387  * @gaddr: address in the guest address space     
388  *                                                
389  * Returns 1 if a TLB flush is required           
390  */                                               
391 static int __gmap_unmap_by_gaddr(struct gmap *    
392 {                                                 
393         unsigned long vmaddr;                     
394                                                   
395         vmaddr = (unsigned long) radix_tree_de    
396                                                   
397         return vmaddr ? __gmap_unlink_by_vmadd    
398 }                                                 
399                                                   
400 /**                                               
401  * gmap_unmap_segment - unmap segment from the    
402  * @gmap: pointer to the guest address space s    
403  * @to: address in the guest address space        
404  * @len: length of the memory area to unmap       
405  *                                                
406  * Returns 0 if the unmap succeeded, -EINVAL i    
407  */                                               
408 int gmap_unmap_segment(struct gmap *gmap, unsi    
409 {                                                 
410         unsigned long off;                        
411         int flush;                                
412                                                   
413         BUG_ON(gmap_is_shadow(gmap));             
414         if ((to | len) & (PMD_SIZE - 1))          
415                 return -EINVAL;                   
416         if (len == 0 || to + len < to)            
417                 return -EINVAL;                   
418                                                   
419         flush = 0;                                
420         mmap_write_lock(gmap->mm);                
421         for (off = 0; off < len; off += PMD_SI    
422                 flush |= __gmap_unmap_by_gaddr    
423         mmap_write_unlock(gmap->mm);              
424         if (flush)                                
425                 gmap_flush_tlb(gmap);             
426         return 0;                                 
427 }                                                 
428 EXPORT_SYMBOL_GPL(gmap_unmap_segment);            
429                                                   
430 /**                                               
431  * gmap_map_segment - map a segment to the gue    
432  * @gmap: pointer to the guest address space s    
433  * @from: source address in the parent address    
434  * @to: target address in the guest address sp    
435  * @len: length of the memory area to map         
436  *                                                
437  * Returns 0 if the mmap succeeded, -EINVAL or    
438  */                                               
439 int gmap_map_segment(struct gmap *gmap, unsign    
440                      unsigned long to, unsigne    
441 {                                                 
442         unsigned long off;                        
443         int flush;                                
444                                                   
445         BUG_ON(gmap_is_shadow(gmap));             
446         if ((from | to | len) & (PMD_SIZE - 1)    
447                 return -EINVAL;                   
448         if (len == 0 || from + len < from || t    
449             from + len - 1 > TASK_SIZE_MAX ||     
450                 return -EINVAL;                   
451                                                   
452         flush = 0;                                
453         mmap_write_lock(gmap->mm);                
454         for (off = 0; off < len; off += PMD_SI    
455                 /* Remove old translation */      
456                 flush |= __gmap_unmap_by_gaddr    
457                 /* Store new translation */       
458                 if (radix_tree_insert(&gmap->g    
459                                       (to + of    
460                                       (void *)    
461                         break;                    
462         }                                         
463         mmap_write_unlock(gmap->mm);              
464         if (flush)                                
465                 gmap_flush_tlb(gmap);             
466         if (off >= len)                           
467                 return 0;                         
468         gmap_unmap_segment(gmap, to, len);        
469         return -ENOMEM;                           
470 }                                                 
471 EXPORT_SYMBOL_GPL(gmap_map_segment);              
472                                                   
473 /**                                               
474  * __gmap_translate - translate a guest addres    
475  * @gmap: pointer to guest mapping meta data s    
476  * @gaddr: guest address                          
477  *                                                
478  * Returns user space address which correspond    
479  * -EFAULT if no such mapping exists.             
480  * This function does not establish potentiall    
481  * The mmap_lock of the mm that belongs to the    
482  * when this function gets called.                
483  *                                                
484  * Note: Can also be called for shadow gmaps.     
485  */                                               
486 unsigned long __gmap_translate(struct gmap *gm    
487 {                                                 
488         unsigned long vmaddr;                     
489                                                   
490         vmaddr = (unsigned long)                  
491                 radix_tree_lookup(&gmap->guest    
492         /* Note: guest_to_host is empty for a     
493         return vmaddr ? (vmaddr | (gaddr & ~PM    
494 }                                                 
495 EXPORT_SYMBOL_GPL(__gmap_translate);              
496                                                   
497 /**                                               
498  * gmap_translate - translate a guest address     
499  * @gmap: pointer to guest mapping meta data s    
500  * @gaddr: guest address                          
501  *                                                
502  * Returns user space address which correspond    
503  * -EFAULT if no such mapping exists.             
504  * This function does not establish potentiall    
505  */                                               
506 unsigned long gmap_translate(struct gmap *gmap    
507 {                                                 
508         unsigned long rc;                         
509                                                   
510         mmap_read_lock(gmap->mm);                 
511         rc = __gmap_translate(gmap, gaddr);       
512         mmap_read_unlock(gmap->mm);               
513         return rc;                                
514 }                                                 
515 EXPORT_SYMBOL_GPL(gmap_translate);                
516                                                   
517 /**                                               
518  * gmap_unlink - disconnect a page table from     
519  * @mm: pointer to the parent mm_struct           
520  * @table: pointer to the host page table         
521  * @vmaddr: vm address associated with the hos    
522  */                                               
523 void gmap_unlink(struct mm_struct *mm, unsigne    
524                  unsigned long vmaddr)            
525 {                                                 
526         struct gmap *gmap;                        
527         int flush;                                
528                                                   
529         rcu_read_lock();                          
530         list_for_each_entry_rcu(gmap, &mm->con    
531                 flush = __gmap_unlink_by_vmadd    
532                 if (flush)                        
533                         gmap_flush_tlb(gmap);     
534         }                                         
535         rcu_read_unlock();                        
536 }                                                 
537                                                   
538 static void gmap_pmdp_xchg(struct gmap *gmap,     
539                            unsigned long gaddr    
540                                                   
541 /**                                               
542  * __gmap_link - set up shadow page tables to     
543  * @gmap: pointer to guest mapping meta data s    
544  * @gaddr: guest address                          
545  * @vmaddr: vm address                            
546  *                                                
547  * Returns 0 on success, -ENOMEM for out of me    
548  * if the vm address is already mapped to a di    
549  * The mmap_lock of the mm that belongs to the    
550  * when this function gets called.                
551  */                                               
552 int __gmap_link(struct gmap *gmap, unsigned lo    
553 {                                                 
554         struct mm_struct *mm;                     
555         unsigned long *table;                     
556         spinlock_t *ptl;                          
557         pgd_t *pgd;                               
558         p4d_t *p4d;                               
559         pud_t *pud;                               
560         pmd_t *pmd;                               
561         u64 unprot;                               
562         int rc;                                   
563                                                   
564         BUG_ON(gmap_is_shadow(gmap));             
565         /* Create higher level tables in the g    
566         table = gmap->table;                      
567         if ((gmap->asce & _ASCE_TYPE_MASK) >=     
568                 table += (gaddr & _REGION1_IND    
569                 if ((*table & _REGION_ENTRY_IN    
570                     gmap_alloc_table(gmap, tab    
571                                      gaddr & _    
572                         return -ENOMEM;           
573                 table = __va(*table & _REGION_    
574         }                                         
575         if ((gmap->asce & _ASCE_TYPE_MASK) >=     
576                 table += (gaddr & _REGION2_IND    
577                 if ((*table & _REGION_ENTRY_IN    
578                     gmap_alloc_table(gmap, tab    
579                                      gaddr & _    
580                         return -ENOMEM;           
581                 table = __va(*table & _REGION_    
582         }                                         
583         if ((gmap->asce & _ASCE_TYPE_MASK) >=     
584                 table += (gaddr & _REGION3_IND    
585                 if ((*table & _REGION_ENTRY_IN    
586                     gmap_alloc_table(gmap, tab    
587                                      gaddr & _    
588                         return -ENOMEM;           
589                 table = __va(*table & _REGION_    
590         }                                         
591         table += (gaddr & _SEGMENT_INDEX) >> _    
592         /* Walk the parent mm page table */       
593         mm = gmap->mm;                            
594         pgd = pgd_offset(mm, vmaddr);             
595         VM_BUG_ON(pgd_none(*pgd));                
596         p4d = p4d_offset(pgd, vmaddr);            
597         VM_BUG_ON(p4d_none(*p4d));                
598         pud = pud_offset(p4d, vmaddr);            
599         VM_BUG_ON(pud_none(*pud));                
600         /* large puds cannot yet be handled */    
601         if (pud_leaf(*pud))                       
602                 return -EFAULT;                   
603         pmd = pmd_offset(pud, vmaddr);            
604         VM_BUG_ON(pmd_none(*pmd));                
605         /* Are we allowed to use huge pages? *    
606         if (pmd_leaf(*pmd) && !gmap->mm->conte    
607                 return -EFAULT;                   
608         /* Link gmap segment table entry locat    
609         rc = radix_tree_preload(GFP_KERNEL_ACC    
610         if (rc)                                   
611                 return rc;                        
612         ptl = pmd_lock(mm, pmd);                  
613         spin_lock(&gmap->guest_table_lock);       
614         if (*table == _SEGMENT_ENTRY_EMPTY) {     
615                 rc = radix_tree_insert(&gmap->    
616                                        vmaddr     
617                 if (!rc) {                        
618                         if (pmd_leaf(*pmd)) {     
619                                 *table = (pmd_    
620                                           _SEG    
621                                         | _SEG    
622                         } else                    
623                                 *table = pmd_v    
624                                         _SEGME    
625                 }                                 
626         } else if (*table & _SEGMENT_ENTRY_PRO    
627                    !(pmd_val(*pmd) & _SEGMENT_    
628                 unprot = (u64)*table;             
629                 unprot &= ~_SEGMENT_ENTRY_PROT    
630                 unprot |= _SEGMENT_ENTRY_GMAP_    
631                 gmap_pmdp_xchg(gmap, (pmd_t *)    
632         }                                         
633         spin_unlock(&gmap->guest_table_lock);     
634         spin_unlock(ptl);                         
635         radix_tree_preload_end();                 
636         return rc;                                
637 }                                                 
638                                                   
639 /**                                               
640  * gmap_fault - resolve a fault on a guest add    
641  * @gmap: pointer to guest mapping meta data s    
642  * @gaddr: guest address                          
643  * @fault_flags: flags to pass down to handle_    
644  *                                                
645  * Returns 0 on success, -ENOMEM for out of me    
646  * if the vm address is already mapped to a di    
647  */                                               
648 int gmap_fault(struct gmap *gmap, unsigned lon    
649                unsigned int fault_flags)          
650 {                                                 
651         unsigned long vmaddr;                     
652         int rc;                                   
653         bool unlocked;                            
654                                                   
655         mmap_read_lock(gmap->mm);                 
656                                                   
657 retry:                                            
658         unlocked = false;                         
659         vmaddr = __gmap_translate(gmap, gaddr)    
660         if (IS_ERR_VALUE(vmaddr)) {               
661                 rc = vmaddr;                      
662                 goto out_up;                      
663         }                                         
664         if (fixup_user_fault(gmap->mm, vmaddr,    
665                              &unlocked)) {        
666                 rc = -EFAULT;                     
667                 goto out_up;                      
668         }                                         
669         /*                                        
670          * In the case that fixup_user_fault u    
671          * faultin redo __gmap_translate to no    
672          */                                       
673         if (unlocked)                             
674                 goto retry;                       
675                                                   
676         rc = __gmap_link(gmap, gaddr, vmaddr);    
677 out_up:                                           
678         mmap_read_unlock(gmap->mm);               
679         return rc;                                
680 }                                                 
681 EXPORT_SYMBOL_GPL(gmap_fault);                    
682                                                   
683 /*                                                
684  * this function is assumed to be called with     
685  */                                               
686 void __gmap_zap(struct gmap *gmap, unsigned lo    
687 {                                                 
688         struct vm_area_struct *vma;               
689         unsigned long vmaddr;                     
690         spinlock_t *ptl;                          
691         pte_t *ptep;                              
692                                                   
693         /* Find the vm address for the guest a    
694         vmaddr = (unsigned long) radix_tree_lo    
695                                                   
696         if (vmaddr) {                             
697                 vmaddr |= gaddr & ~PMD_MASK;      
698                                                   
699                 vma = vma_lookup(gmap->mm, vma    
700                 if (!vma || is_vm_hugetlb_page    
701                         return;                   
702                                                   
703                 /* Get pointer to the page tab    
704                 ptep = get_locked_pte(gmap->mm    
705                 if (likely(ptep)) {               
706                         ptep_zap_unused(gmap->    
707                         pte_unmap_unlock(ptep,    
708                 }                                 
709         }                                         
710 }                                                 
711 EXPORT_SYMBOL_GPL(__gmap_zap);                    
712                                                   
713 void gmap_discard(struct gmap *gmap, unsigned     
714 {                                                 
715         unsigned long gaddr, vmaddr, size;        
716         struct vm_area_struct *vma;               
717                                                   
718         mmap_read_lock(gmap->mm);                 
719         for (gaddr = from; gaddr < to;            
720              gaddr = (gaddr + PMD_SIZE) & PMD_    
721                 /* Find the vm address for the    
722                 vmaddr = (unsigned long)          
723                         radix_tree_lookup(&gma    
724                                           gadd    
725                 if (!vmaddr)                      
726                         continue;                 
727                 vmaddr |= gaddr & ~PMD_MASK;      
728                 /* Find vma in the parent mm *    
729                 vma = find_vma(gmap->mm, vmadd    
730                 if (!vma)                         
731                         continue;                 
732                 /*                                
733                  * We do not discard pages tha    
734                  * hugetlbfs, so we don't have    
735                  */                               
736                 if (is_vm_hugetlb_page(vma))      
737                         continue;                 
738                 size = min(to - gaddr, PMD_SIZ    
739                 zap_page_range_single(vma, vma    
740         }                                         
741         mmap_read_unlock(gmap->mm);               
742 }                                                 
743 EXPORT_SYMBOL_GPL(gmap_discard);                  
744                                                   
745 static LIST_HEAD(gmap_notifier_list);             
746 static DEFINE_SPINLOCK(gmap_notifier_lock);       
747                                                   
748 /**                                               
749  * gmap_register_pte_notifier - register a pte    
750  * @nb: pointer to the gmap notifier block        
751  */                                               
752 void gmap_register_pte_notifier(struct gmap_no    
753 {                                                 
754         spin_lock(&gmap_notifier_lock);           
755         list_add_rcu(&nb->list, &gmap_notifier    
756         spin_unlock(&gmap_notifier_lock);         
757 }                                                 
758 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);    
759                                                   
760 /**                                               
761  * gmap_unregister_pte_notifier - remove a pte    
762  * @nb: pointer to the gmap notifier block        
763  */                                               
764 void gmap_unregister_pte_notifier(struct gmap_    
765 {                                                 
766         spin_lock(&gmap_notifier_lock);           
767         list_del_rcu(&nb->list);                  
768         spin_unlock(&gmap_notifier_lock);         
769         synchronize_rcu();                        
770 }                                                 
771 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier    
772                                                   
773 /**                                               
774  * gmap_call_notifier - call all registered in    
775  * @gmap: pointer to guest mapping meta data s    
776  * @start: start virtual address in the guest     
777  * @end: end virtual address in the guest addr    
778  */                                               
779 static void gmap_call_notifier(struct gmap *gm    
780                                unsigned long e    
781 {                                                 
782         struct gmap_notifier *nb;                 
783                                                   
784         list_for_each_entry(nb, &gmap_notifier    
785                 nb->notifier_call(gmap, start,    
786 }                                                 
787                                                   
788 /**                                               
789  * gmap_table_walk - walk the gmap page tables    
790  * @gmap: pointer to guest mapping meta data s    
791  * @gaddr: virtual address in the guest addres    
792  * @level: page table level to stop at            
793  *                                                
794  * Returns a table entry pointer for the given    
795  * @level=0 : returns a pointer to a page tabl    
796  * @level=1 : returns a pointer to a segment t    
797  * @level=2 : returns a pointer to a region-3     
798  * @level=3 : returns a pointer to a region-2     
799  * @level=4 : returns a pointer to a region-1     
800  *                                                
801  * Returns NULL if the gmap page tables could     
802  * requested level.                               
803  *                                                
804  * Note: Can also be called for shadow gmaps.     
805  */                                               
806 static inline unsigned long *gmap_table_walk(s    
807                                              u    
808 {                                                 
809         const int asce_type = gmap->asce & _AS    
810         unsigned long *table = gmap->table;       
811                                                   
812         if (gmap_is_shadow(gmap) && gmap->remo    
813                 return NULL;                      
814                                                   
815         if (WARN_ON_ONCE(level > (asce_type >>    
816                 return NULL;                      
817                                                   
818         if (asce_type != _ASCE_TYPE_REGION1 &&    
819             gaddr & (-1UL << (31 + (asce_type     
820                 return NULL;                      
821                                                   
822         switch (asce_type) {                      
823         case _ASCE_TYPE_REGION1:                  
824                 table += (gaddr & _REGION1_IND    
825                 if (level == 4)                   
826                         break;                    
827                 if (*table & _REGION_ENTRY_INV    
828                         return NULL;              
829                 table = __va(*table & _REGION_    
830                 fallthrough;                      
831         case _ASCE_TYPE_REGION2:                  
832                 table += (gaddr & _REGION2_IND    
833                 if (level == 3)                   
834                         break;                    
835                 if (*table & _REGION_ENTRY_INV    
836                         return NULL;              
837                 table = __va(*table & _REGION_    
838                 fallthrough;                      
839         case _ASCE_TYPE_REGION3:                  
840                 table += (gaddr & _REGION3_IND    
841                 if (level == 2)                   
842                         break;                    
843                 if (*table & _REGION_ENTRY_INV    
844                         return NULL;              
845                 table = __va(*table & _REGION_    
846                 fallthrough;                      
847         case _ASCE_TYPE_SEGMENT:                  
848                 table += (gaddr & _SEGMENT_IND    
849                 if (level == 1)                   
850                         break;                    
851                 if (*table & _REGION_ENTRY_INV    
852                         return NULL;              
853                 table = __va(*table & _SEGMENT    
854                 table += (gaddr & _PAGE_INDEX)    
855         }                                         
856         return table;                             
857 }                                                 
858                                                   
859 /**                                               
860  * gmap_pte_op_walk - walk the gmap page table    
861  *                    and return the pte point    
862  * @gmap: pointer to guest mapping meta data s    
863  * @gaddr: virtual address in the guest addres    
864  * @ptl: pointer to the spinlock pointer          
865  *                                                
866  * Returns a pointer to the locked pte for a g    
867  */                                               
868 static pte_t *gmap_pte_op_walk(struct gmap *gm    
869                                spinlock_t **pt    
870 {                                                 
871         unsigned long *table;                     
872                                                   
873         BUG_ON(gmap_is_shadow(gmap));             
874         /* Walk the gmap page table, lock and     
875         table = gmap_table_walk(gmap, gaddr, 1    
876         if (!table || *table & _SEGMENT_ENTRY_    
877                 return NULL;                      
878         return pte_alloc_map_lock(gmap->mm, (p    
879 }                                                 
880                                                   
881 /**                                               
882  * gmap_pte_op_fixup - force a page in and con    
883  * @gmap: pointer to guest mapping meta data s    
884  * @gaddr: virtual address in the guest addres    
885  * @vmaddr: address in the host process addres    
886  * @prot: indicates access rights: PROT_NONE,     
887  *                                                
888  * Returns 0 if the caller can retry __gmap_tr    
889  * -ENOMEM if out of memory and -EFAULT if any    
890  * up or connecting the gmap page table.          
891  */                                               
892 static int gmap_pte_op_fixup(struct gmap *gmap    
893                              unsigned long vma    
894 {                                                 
895         struct mm_struct *mm = gmap->mm;          
896         unsigned int fault_flags;                 
897         bool unlocked = false;                    
898                                                   
899         BUG_ON(gmap_is_shadow(gmap));             
900         fault_flags = (prot == PROT_WRITE) ? F    
901         if (fixup_user_fault(mm, vmaddr, fault    
902                 return -EFAULT;                   
903         if (unlocked)                             
904                 /* lost mmap_lock, caller has     
905                 return 0;                         
906         /* Connect the page tables */             
907         return __gmap_link(gmap, gaddr, vmaddr    
908 }                                                 
909                                                   
910 /**                                               
911  * gmap_pte_op_end - release the page table lo    
912  * @ptep: pointer to the locked pte               
913  * @ptl: pointer to the page table spinlock       
914  */                                               
915 static void gmap_pte_op_end(pte_t *ptep, spinl    
916 {                                                 
917         pte_unmap_unlock(ptep, ptl);              
918 }                                                 
919                                                   
920 /**                                               
921  * gmap_pmd_op_walk - walk the gmap tables, ge    
922  *                    and return the pmd point    
923  * @gmap: pointer to guest mapping meta data s    
924  * @gaddr: virtual address in the guest addres    
925  *                                                
926  * Returns a pointer to the pmd for a guest ad    
927  */                                               
928 static inline pmd_t *gmap_pmd_op_walk(struct g    
929 {                                                 
930         pmd_t *pmdp;                              
931                                                   
932         BUG_ON(gmap_is_shadow(gmap));             
933         pmdp = (pmd_t *) gmap_table_walk(gmap,    
934         if (!pmdp)                                
935                 return NULL;                      
936                                                   
937         /* without huge pages, there is no nee    
938         if (!gmap->mm->context.allow_gmap_hpag    
939                 return pmd_none(*pmdp) ? NULL     
940                                                   
941         spin_lock(&gmap->guest_table_lock);       
942         if (pmd_none(*pmdp)) {                    
943                 spin_unlock(&gmap->guest_table    
944                 return NULL;                      
945         }                                         
946                                                   
947         /* 4k page table entries are locked vi    
948         if (!pmd_leaf(*pmdp))                     
949                 spin_unlock(&gmap->guest_table    
950         return pmdp;                              
951 }                                                 
952                                                   
953 /**                                               
954  * gmap_pmd_op_end - release the guest_table_l    
955  * @gmap: pointer to the guest mapping meta da    
956  * @pmdp: pointer to the pmd                      
957  */                                               
958 static inline void gmap_pmd_op_end(struct gmap    
959 {                                                 
960         if (pmd_leaf(*pmdp))                      
961                 spin_unlock(&gmap->guest_table    
962 }                                                 
963                                                   
964 /*                                                
965  * gmap_protect_pmd - remove access rights to     
966  * @pmdp: pointer to the pmd to be protected      
967  * @prot: indicates access rights: PROT_NONE,     
968  * @bits: notification bits to set                
969  *                                                
970  * Returns:                                       
971  * 0 if successfully protected                    
972  * -EAGAIN if a fixup is needed                   
973  * -EINVAL if unsupported notifier bits have b    
974  *                                                
975  * Expected to be called with sg->mm->mmap_loc    
976  * guest_table_lock held.                         
977  */                                               
978 static int gmap_protect_pmd(struct gmap *gmap,    
979                             pmd_t *pmdp, int p    
980 {                                                 
981         int pmd_i = pmd_val(*pmdp) & _SEGMENT_    
982         int pmd_p = pmd_val(*pmdp) & _SEGMENT_    
983         pmd_t new = *pmdp;                        
984                                                   
985         /* Fixup needed */                        
986         if ((pmd_i && (prot != PROT_NONE)) ||     
987                 return -EAGAIN;                   
988                                                   
989         if (prot == PROT_NONE && !pmd_i) {        
990                 new = set_pmd_bit(new, __pgpro    
991                 gmap_pmdp_xchg(gmap, pmdp, new    
992         }                                         
993                                                   
994         if (prot == PROT_READ && !pmd_p) {        
995                 new = clear_pmd_bit(new, __pgp    
996                 new = set_pmd_bit(new, __pgpro    
997                 gmap_pmdp_xchg(gmap, pmdp, new    
998         }                                         
999                                                   
1000         if (bits & GMAP_NOTIFY_MPROT)            
1001                 set_pmd(pmdp, set_pmd_bit(*pm    
1002                                                  
1003         /* Shadow GMAP protection needs split    
1004         if (bits & GMAP_NOTIFY_SHADOW)           
1005                 return -EINVAL;                  
1006                                                  
1007         return 0;                                
1008 }                                                
1009                                                  
1010 /*                                               
1011  * gmap_protect_pte - remove access rights to    
1012  * @gmap: pointer to guest mapping meta data     
1013  * @gaddr: virtual address in the guest addre    
1014  * @pmdp: pointer to the pmd associated with     
1015  * @prot: indicates access rights: PROT_NONE,    
1016  * @bits: notification bits to set               
1017  *                                               
1018  * Returns 0 if successfully protected, -ENOM    
1019  * -EAGAIN if a fixup is needed.                 
1020  *                                               
1021  * Expected to be called with sg->mm->mmap_lo    
1022  */                                              
1023 static int gmap_protect_pte(struct gmap *gmap    
1024                             pmd_t *pmdp, int     
1025 {                                                
1026         int rc;                                  
1027         pte_t *ptep;                             
1028         spinlock_t *ptl;                         
1029         unsigned long pbits = 0;                 
1030                                                  
1031         if (pmd_val(*pmdp) & _SEGMENT_ENTRY_I    
1032                 return -EAGAIN;                  
1033                                                  
1034         ptep = pte_alloc_map_lock(gmap->mm, p    
1035         if (!ptep)                               
1036                 return -ENOMEM;                  
1037                                                  
1038         pbits |= (bits & GMAP_NOTIFY_MPROT) ?    
1039         pbits |= (bits & GMAP_NOTIFY_SHADOW)     
1040         /* Protect and unlock. */                
1041         rc = ptep_force_prot(gmap->mm, gaddr,    
1042         gmap_pte_op_end(ptep, ptl);              
1043         return rc;                               
1044 }                                                
1045                                                  
1046 /*                                               
1047  * gmap_protect_range - remove access rights     
1048  * @gmap: pointer to guest mapping meta data     
1049  * @gaddr: virtual address in the guest addre    
1050  * @len: size of area                            
1051  * @prot: indicates access rights: PROT_NONE,    
1052  * @bits: pgste notification bits to set         
1053  *                                               
1054  * Returns 0 if successfully protected, -ENOM    
1055  * -EFAULT if gaddr is invalid (or mapping fo    
1056  *                                               
1057  * Called with sg->mm->mmap_lock in read.        
1058  */                                              
1059 static int gmap_protect_range(struct gmap *gm    
1060                               unsigned long l    
1061 {                                                
1062         unsigned long vmaddr, dist;              
1063         pmd_t *pmdp;                             
1064         int rc;                                  
1065                                                  
1066         BUG_ON(gmap_is_shadow(gmap));            
1067         while (len) {                            
1068                 rc = -EAGAIN;                    
1069                 pmdp = gmap_pmd_op_walk(gmap,    
1070                 if (pmdp) {                      
1071                         if (!pmd_leaf(*pmdp))    
1072                                 rc = gmap_pro    
1073                                                  
1074                                 if (!rc) {       
1075                                         len -    
1076                                         gaddr    
1077                                 }                
1078                         } else {                 
1079                                 rc = gmap_pro    
1080                                                  
1081                                 if (!rc) {       
1082                                         dist     
1083                                         len =    
1084                                         gaddr    
1085                                 }                
1086                         }                        
1087                         gmap_pmd_op_end(gmap,    
1088                 }                                
1089                 if (rc) {                        
1090                         if (rc == -EINVAL)       
1091                                 return rc;       
1092                                                  
1093                         /* -EAGAIN, fixup of     
1094                         vmaddr = __gmap_trans    
1095                         if (IS_ERR_VALUE(vmad    
1096                                 return vmaddr    
1097                         rc = gmap_pte_op_fixu    
1098                         if (rc)                  
1099                                 return rc;       
1100                 }                                
1101         }                                        
1102         return 0;                                
1103 }                                                
1104                                                  
1105 /**                                              
1106  * gmap_mprotect_notify - change access right    
1107  *                        call the notifier i    
1108  * @gmap: pointer to guest mapping meta data     
1109  * @gaddr: virtual address in the guest addre    
1110  * @len: size of area                            
1111  * @prot: indicates access rights: PROT_NONE,    
1112  *                                               
1113  * Returns 0 if for each page in the given ra    
1114  * the new access rights could be set and the    
1115  * If the gmap mapping is missing for one or     
1116  * returned. If no memory could be allocated     
1117  * This function establishes missing page tab    
1118  */                                              
1119 int gmap_mprotect_notify(struct gmap *gmap, u    
1120                          unsigned long len, i    
1121 {                                                
1122         int rc;                                  
1123                                                  
1124         if ((gaddr & ~PAGE_MASK) || (len & ~P    
1125                 return -EINVAL;                  
1126         if (!MACHINE_HAS_ESOP && prot == PROT    
1127                 return -EINVAL;                  
1128         mmap_read_lock(gmap->mm);                
1129         rc = gmap_protect_range(gmap, gaddr,     
1130         mmap_read_unlock(gmap->mm);              
1131         return rc;                               
1132 }                                                
1133 EXPORT_SYMBOL_GPL(gmap_mprotect_notify);         
1134                                                  
1135 /**                                              
1136  * gmap_read_table - get an unsigned long val    
1137  *                   absolute addressing, wit    
1138  * @gmap: pointer to guest mapping meta data     
1139  * @gaddr: virtual address in the guest addre    
1140  * @val: pointer to the unsigned long value t    
1141  *                                               
1142  * Returns 0 if the value was read, -ENOMEM i    
1143  * if reading using the virtual address faile    
1144  * shadow.                                       
1145  *                                               
1146  * Called with gmap->mm->mmap_lock in read.      
1147  */                                              
1148 int gmap_read_table(struct gmap *gmap, unsign    
1149 {                                                
1150         unsigned long address, vmaddr;           
1151         spinlock_t *ptl;                         
1152         pte_t *ptep, pte;                        
1153         int rc;                                  
1154                                                  
1155         if (gmap_is_shadow(gmap))                
1156                 return -EINVAL;                  
1157                                                  
1158         while (1) {                              
1159                 rc = -EAGAIN;                    
1160                 ptep = gmap_pte_op_walk(gmap,    
1161                 if (ptep) {                      
1162                         pte = *ptep;             
1163                         if (pte_present(pte)     
1164                                 address = pte    
1165                                 address += ga    
1166                                 *val = *(unsi    
1167                                 set_pte(ptep,    
1168                                 /* Do *NOT* c    
1169                                 rc = 0;          
1170                         }                        
1171                         gmap_pte_op_end(ptep,    
1172                 }                                
1173                 if (!rc)                         
1174                         break;                   
1175                 vmaddr = __gmap_translate(gma    
1176                 if (IS_ERR_VALUE(vmaddr)) {      
1177                         rc = vmaddr;             
1178                         break;                   
1179                 }                                
1180                 rc = gmap_pte_op_fixup(gmap,     
1181                 if (rc)                          
1182                         break;                   
1183         }                                        
1184         return rc;                               
1185 }                                                
1186 EXPORT_SYMBOL_GPL(gmap_read_table);              
1187                                                  
1188 /**                                              
1189  * gmap_insert_rmap - add a rmap to the host_    
1190  * @sg: pointer to the shadow guest address s    
1191  * @vmaddr: vm address associated with the rm    
1192  * @rmap: pointer to the rmap structure          
1193  *                                               
1194  * Called with the sg->guest_table_lock          
1195  */                                              
1196 static inline void gmap_insert_rmap(struct gm    
1197                                     struct gm    
1198 {                                                
1199         struct gmap_rmap *temp;                  
1200         void __rcu **slot;                       
1201                                                  
1202         BUG_ON(!gmap_is_shadow(sg));             
1203         slot = radix_tree_lookup_slot(&sg->ho    
1204         if (slot) {                              
1205                 rmap->next = radix_tree_deref    
1206                                                  
1207                 for (temp = rmap->next; temp;    
1208                         if (temp->raddr == rm    
1209                                 kfree(rmap);     
1210                                 return;          
1211                         }                        
1212                 }                                
1213                 radix_tree_replace_slot(&sg->    
1214         } else {                                 
1215                 rmap->next = NULL;               
1216                 radix_tree_insert(&sg->host_t    
1217                                   rmap);         
1218         }                                        
1219 }                                                
1220                                                  
1221 /**                                              
1222  * gmap_protect_rmap - restrict access rights    
1223  * @sg: pointer to the shadow guest address s    
1224  * @raddr: rmap address in the shadow gmap       
1225  * @paddr: address in the parent guest addres    
1226  * @len: length of the memory area to protect    
1227  *                                               
1228  * Returns 0 if successfully protected and th    
1229  * if out of memory and -EFAULT if paddr is i    
1230  */                                              
1231 static int gmap_protect_rmap(struct gmap *sg,    
1232                              unsigned long pa    
1233 {                                                
1234         struct gmap *parent;                     
1235         struct gmap_rmap *rmap;                  
1236         unsigned long vmaddr;                    
1237         spinlock_t *ptl;                         
1238         pte_t *ptep;                             
1239         int rc;                                  
1240                                                  
1241         BUG_ON(!gmap_is_shadow(sg));             
1242         parent = sg->parent;                     
1243         while (len) {                            
1244                 vmaddr = __gmap_translate(par    
1245                 if (IS_ERR_VALUE(vmaddr))        
1246                         return vmaddr;           
1247                 rmap = kzalloc(sizeof(*rmap),    
1248                 if (!rmap)                       
1249                         return -ENOMEM;          
1250                 rmap->raddr = raddr;             
1251                 rc = radix_tree_preload(GFP_K    
1252                 if (rc) {                        
1253                         kfree(rmap);             
1254                         return rc;               
1255                 }                                
1256                 rc = -EAGAIN;                    
1257                 ptep = gmap_pte_op_walk(paren    
1258                 if (ptep) {                      
1259                         spin_lock(&sg->guest_    
1260                         rc = ptep_force_prot(    
1261                                                  
1262                         if (!rc)                 
1263                                 gmap_insert_r    
1264                         spin_unlock(&sg->gues    
1265                         gmap_pte_op_end(ptep,    
1266                 }                                
1267                 radix_tree_preload_end();        
1268                 if (rc) {                        
1269                         kfree(rmap);             
1270                         rc = gmap_pte_op_fixu    
1271                         if (rc)                  
1272                                 return rc;       
1273                         continue;                
1274                 }                                
1275                 paddr += PAGE_SIZE;              
1276                 len -= PAGE_SIZE;                
1277         }                                        
1278         return 0;                                
1279 }                                                
1280                                                  
1281 #define _SHADOW_RMAP_MASK       0x7              
1282 #define _SHADOW_RMAP_REGION1    0x5              
1283 #define _SHADOW_RMAP_REGION2    0x4              
1284 #define _SHADOW_RMAP_REGION3    0x3              
1285 #define _SHADOW_RMAP_SEGMENT    0x2              
1286 #define _SHADOW_RMAP_PGTABLE    0x1              
1287                                                  
1288 /**                                              
1289  * gmap_idte_one - invalidate a single region    
1290  * @asce: region or segment table *origin* +     
1291  * @vaddr: virtual address to identify the ta    
1292  *                                               
1293  * The invalid bit of a single region or segm    
1294  * and the associated TLB entries depending o    
1295  * The table-type of the @asce identifies the    
1296  * that is used as the invalidation index.       
1297  */                                              
1298 static inline void gmap_idte_one(unsigned lon    
1299 {                                                
1300         asm volatile(                            
1301                 "       idte    %0,0,%1"         
1302                 : : "a" (asce), "a" (vaddr) :    
1303 }                                                
1304                                                  
1305 /**                                              
1306  * gmap_unshadow_page - remove a page from a     
1307  * @sg: pointer to the shadow guest address s    
1308  * @raddr: rmap address in the shadow guest a    
1309  *                                               
1310  * Called with the sg->guest_table_lock          
1311  */                                              
1312 static void gmap_unshadow_page(struct gmap *s    
1313 {                                                
1314         unsigned long *table;                    
1315                                                  
1316         BUG_ON(!gmap_is_shadow(sg));             
1317         table = gmap_table_walk(sg, raddr, 0)    
1318         if (!table || *table & _PAGE_INVALID)    
1319                 return;                          
1320         gmap_call_notifier(sg, raddr, raddr +    
1321         ptep_unshadow_pte(sg->mm, raddr, (pte    
1322 }                                                
1323                                                  
1324 /**                                              
1325  * __gmap_unshadow_pgt - remove all entries f    
1326  * @sg: pointer to the shadow guest address s    
1327  * @raddr: rmap address in the shadow guest a    
1328  * @pgt: pointer to the start of a shadow pag    
1329  *                                               
1330  * Called with the sg->guest_table_lock          
1331  */                                              
1332 static void __gmap_unshadow_pgt(struct gmap *    
1333                                 unsigned long    
1334 {                                                
1335         int i;                                   
1336                                                  
1337         BUG_ON(!gmap_is_shadow(sg));             
1338         for (i = 0; i < _PAGE_ENTRIES; i++, r    
1339                 pgt[i] = _PAGE_INVALID;          
1340 }                                                
1341                                                  
1342 /**                                              
1343  * gmap_unshadow_pgt - remove a shadow page t    
1344  * @sg: pointer to the shadow guest address s    
1345  * @raddr: address in the shadow guest addres    
1346  *                                               
1347  * Called with the sg->guest_table_lock          
1348  */                                              
1349 static void gmap_unshadow_pgt(struct gmap *sg    
1350 {                                                
1351         unsigned long *ste;                      
1352         phys_addr_t sto, pgt;                    
1353         struct ptdesc *ptdesc;                   
1354                                                  
1355         BUG_ON(!gmap_is_shadow(sg));             
1356         ste = gmap_table_walk(sg, raddr, 1);     
1357         if (!ste || !(*ste & _SEGMENT_ENTRY_O    
1358                 return;                          
1359         gmap_call_notifier(sg, raddr, raddr +    
1360         sto = __pa(ste - ((raddr & _SEGMENT_I    
1361         gmap_idte_one(sto | _ASCE_TYPE_SEGMEN    
1362         pgt = *ste & _SEGMENT_ENTRY_ORIGIN;      
1363         *ste = _SEGMENT_ENTRY_EMPTY;             
1364         __gmap_unshadow_pgt(sg, raddr, __va(p    
1365         /* Free page table */                    
1366         ptdesc = page_ptdesc(phys_to_page(pgt    
1367         list_del(&ptdesc->pt_list);              
1368         page_table_free_pgste(ptdesc);           
1369 }                                                
1370                                                  
1371 /**                                              
1372  * __gmap_unshadow_sgt - remove all entries f    
1373  * @sg: pointer to the shadow guest address s    
1374  * @raddr: rmap address in the shadow guest a    
1375  * @sgt: pointer to the start of a shadow seg    
1376  *                                               
1377  * Called with the sg->guest_table_lock          
1378  */                                              
1379 static void __gmap_unshadow_sgt(struct gmap *    
1380                                 unsigned long    
1381 {                                                
1382         struct ptdesc *ptdesc;                   
1383         phys_addr_t pgt;                         
1384         int i;                                   
1385                                                  
1386         BUG_ON(!gmap_is_shadow(sg));             
1387         for (i = 0; i < _CRST_ENTRIES; i++, r    
1388                 if (!(sgt[i] & _SEGMENT_ENTRY    
1389                         continue;                
1390                 pgt = sgt[i] & _REGION_ENTRY_    
1391                 sgt[i] = _SEGMENT_ENTRY_EMPTY    
1392                 __gmap_unshadow_pgt(sg, raddr    
1393                 /* Free page table */            
1394                 ptdesc = page_ptdesc(phys_to_    
1395                 list_del(&ptdesc->pt_list);      
1396                 page_table_free_pgste(ptdesc)    
1397         }                                        
1398 }                                                
1399                                                  
1400 /**                                              
1401  * gmap_unshadow_sgt - remove a shadow segmen    
1402  * @sg: pointer to the shadow guest address s    
1403  * @raddr: rmap address in the shadow guest a    
1404  *                                               
1405  * Called with the shadow->guest_table_lock      
1406  */                                              
1407 static void gmap_unshadow_sgt(struct gmap *sg    
1408 {                                                
1409         unsigned long r3o, *r3e;                 
1410         phys_addr_t sgt;                         
1411         struct page *page;                       
1412                                                  
1413         BUG_ON(!gmap_is_shadow(sg));             
1414         r3e = gmap_table_walk(sg, raddr, 2);     
1415         if (!r3e || !(*r3e & _REGION_ENTRY_OR    
1416                 return;                          
1417         gmap_call_notifier(sg, raddr, raddr +    
1418         r3o = (unsigned long) (r3e - ((raddr     
1419         gmap_idte_one(__pa(r3o) | _ASCE_TYPE_    
1420         sgt = *r3e & _REGION_ENTRY_ORIGIN;       
1421         *r3e = _REGION3_ENTRY_EMPTY;             
1422         __gmap_unshadow_sgt(sg, raddr, __va(s    
1423         /* Free segment table */                 
1424         page = phys_to_page(sgt);                
1425         list_del(&page->lru);                    
1426         __free_pages(page, CRST_ALLOC_ORDER);    
1427 }                                                
1428                                                  
1429 /**                                              
1430  * __gmap_unshadow_r3t - remove all entries f    
1431  * @sg: pointer to the shadow guest address s    
1432  * @raddr: address in the shadow guest addres    
1433  * @r3t: pointer to the start of a shadow reg    
1434  *                                               
1435  * Called with the sg->guest_table_lock          
1436  */                                              
1437 static void __gmap_unshadow_r3t(struct gmap *    
1438                                 unsigned long    
1439 {                                                
1440         struct page *page;                       
1441         phys_addr_t sgt;                         
1442         int i;                                   
1443                                                  
1444         BUG_ON(!gmap_is_shadow(sg));             
1445         for (i = 0; i < _CRST_ENTRIES; i++, r    
1446                 if (!(r3t[i] & _REGION_ENTRY_    
1447                         continue;                
1448                 sgt = r3t[i] & _REGION_ENTRY_    
1449                 r3t[i] = _REGION3_ENTRY_EMPTY    
1450                 __gmap_unshadow_sgt(sg, raddr    
1451                 /* Free segment table */         
1452                 page = phys_to_page(sgt);        
1453                 list_del(&page->lru);            
1454                 __free_pages(page, CRST_ALLOC    
1455         }                                        
1456 }                                                
1457                                                  
1458 /**                                              
1459  * gmap_unshadow_r3t - remove a shadow region    
1460  * @sg: pointer to the shadow guest address s    
1461  * @raddr: rmap address in the shadow guest a    
1462  *                                               
1463  * Called with the sg->guest_table_lock          
1464  */                                              
1465 static void gmap_unshadow_r3t(struct gmap *sg    
1466 {                                                
1467         unsigned long r2o, *r2e;                 
1468         phys_addr_t r3t;                         
1469         struct page *page;                       
1470                                                  
1471         BUG_ON(!gmap_is_shadow(sg));             
1472         r2e = gmap_table_walk(sg, raddr, 3);     
1473         if (!r2e || !(*r2e & _REGION_ENTRY_OR    
1474                 return;                          
1475         gmap_call_notifier(sg, raddr, raddr +    
1476         r2o = (unsigned long) (r2e - ((raddr     
1477         gmap_idte_one(__pa(r2o) | _ASCE_TYPE_    
1478         r3t = *r2e & _REGION_ENTRY_ORIGIN;       
1479         *r2e = _REGION2_ENTRY_EMPTY;             
1480         __gmap_unshadow_r3t(sg, raddr, __va(r    
1481         /* Free region 3 table */                
1482         page = phys_to_page(r3t);                
1483         list_del(&page->lru);                    
1484         __free_pages(page, CRST_ALLOC_ORDER);    
1485 }                                                
1486                                                  
1487 /**                                              
1488  * __gmap_unshadow_r2t - remove all entries f    
1489  * @sg: pointer to the shadow guest address s    
1490  * @raddr: rmap address in the shadow guest a    
1491  * @r2t: pointer to the start of a shadow reg    
1492  *                                               
1493  * Called with the sg->guest_table_lock          
1494  */                                              
1495 static void __gmap_unshadow_r2t(struct gmap *    
1496                                 unsigned long    
1497 {                                                
1498         phys_addr_t r3t;                         
1499         struct page *page;                       
1500         int i;                                   
1501                                                  
1502         BUG_ON(!gmap_is_shadow(sg));             
1503         for (i = 0; i < _CRST_ENTRIES; i++, r    
1504                 if (!(r2t[i] & _REGION_ENTRY_    
1505                         continue;                
1506                 r3t = r2t[i] & _REGION_ENTRY_    
1507                 r2t[i] = _REGION2_ENTRY_EMPTY    
1508                 __gmap_unshadow_r3t(sg, raddr    
1509                 /* Free region 3 table */        
1510                 page = phys_to_page(r3t);        
1511                 list_del(&page->lru);            
1512                 __free_pages(page, CRST_ALLOC    
1513         }                                        
1514 }                                                
1515                                                  
1516 /**                                              
1517  * gmap_unshadow_r2t - remove a shadow region    
1518  * @sg: pointer to the shadow guest address s    
1519  * @raddr: rmap address in the shadow guest a    
1520  *                                               
1521  * Called with the sg->guest_table_lock          
1522  */                                              
1523 static void gmap_unshadow_r2t(struct gmap *sg    
1524 {                                                
1525         unsigned long r1o, *r1e;                 
1526         struct page *page;                       
1527         phys_addr_t r2t;                         
1528                                                  
1529         BUG_ON(!gmap_is_shadow(sg));             
1530         r1e = gmap_table_walk(sg, raddr, 4);     
1531         if (!r1e || !(*r1e & _REGION_ENTRY_OR    
1532                 return;                          
1533         gmap_call_notifier(sg, raddr, raddr +    
1534         r1o = (unsigned long) (r1e - ((raddr     
1535         gmap_idte_one(__pa(r1o) | _ASCE_TYPE_    
1536         r2t = *r1e & _REGION_ENTRY_ORIGIN;       
1537         *r1e = _REGION1_ENTRY_EMPTY;             
1538         __gmap_unshadow_r2t(sg, raddr, __va(r    
1539         /* Free region 2 table */                
1540         page = phys_to_page(r2t);                
1541         list_del(&page->lru);                    
1542         __free_pages(page, CRST_ALLOC_ORDER);    
1543 }                                                
1544                                                  
1545 /**                                              
1546  * __gmap_unshadow_r1t - remove all entries f    
1547  * @sg: pointer to the shadow guest address s    
1548  * @raddr: rmap address in the shadow guest a    
1549  * @r1t: pointer to the start of a shadow reg    
1550  *                                               
1551  * Called with the shadow->guest_table_lock      
1552  */                                              
1553 static void __gmap_unshadow_r1t(struct gmap *    
1554                                 unsigned long    
1555 {                                                
1556         unsigned long asce;                      
1557         struct page *page;                       
1558         phys_addr_t r2t;                         
1559         int i;                                   
1560                                                  
1561         BUG_ON(!gmap_is_shadow(sg));             
1562         asce = __pa(r1t) | _ASCE_TYPE_REGION1    
1563         for (i = 0; i < _CRST_ENTRIES; i++, r    
1564                 if (!(r1t[i] & _REGION_ENTRY_    
1565                         continue;                
1566                 r2t = r1t[i] & _REGION_ENTRY_    
1567                 __gmap_unshadow_r2t(sg, raddr    
1568                 /* Clear entry and flush tran    
1569                 gmap_idte_one(asce, raddr);      
1570                 r1t[i] = _REGION1_ENTRY_EMPTY    
1571                 /* Free region 2 table */        
1572                 page = phys_to_page(r2t);        
1573                 list_del(&page->lru);            
1574                 __free_pages(page, CRST_ALLOC    
1575         }                                        
1576 }                                                
1577                                                  
1578 /**                                              
1579  * gmap_unshadow - remove a shadow page table    
1580  * @sg: pointer to the shadow guest address s    
1581  *                                               
1582  * Called with sg->guest_table_lock              
1583  */                                              
1584 static void gmap_unshadow(struct gmap *sg)       
1585 {                                                
1586         unsigned long *table;                    
1587                                                  
1588         BUG_ON(!gmap_is_shadow(sg));             
1589         if (sg->removed)                         
1590                 return;                          
1591         sg->removed = 1;                         
1592         gmap_call_notifier(sg, 0, -1UL);         
1593         gmap_flush_tlb(sg);                      
1594         table = __va(sg->asce & _ASCE_ORIGIN)    
1595         switch (sg->asce & _ASCE_TYPE_MASK) {    
1596         case _ASCE_TYPE_REGION1:                 
1597                 __gmap_unshadow_r1t(sg, 0, ta    
1598                 break;                           
1599         case _ASCE_TYPE_REGION2:                 
1600                 __gmap_unshadow_r2t(sg, 0, ta    
1601                 break;                           
1602         case _ASCE_TYPE_REGION3:                 
1603                 __gmap_unshadow_r3t(sg, 0, ta    
1604                 break;                           
1605         case _ASCE_TYPE_SEGMENT:                 
1606                 __gmap_unshadow_sgt(sg, 0, ta    
1607                 break;                           
1608         }                                        
1609 }                                                
1610                                                  
1611 /**                                              
1612  * gmap_find_shadow - find a specific asce in    
1613  * @parent: pointer to the parent gmap           
1614  * @asce: ASCE for which the shadow table is     
1615  * @edat_level: edat level to be used for the    
1616  *                                               
1617  * Returns the pointer to a gmap if a shadow     
1618  * already available, ERR_PTR(-EAGAIN) if ano    
1619  * otherwise NULL                                
1620  */                                              
1621 static struct gmap *gmap_find_shadow(struct g    
1622                                      int edat    
1623 {                                                
1624         struct gmap *sg;                         
1625                                                  
1626         list_for_each_entry(sg, &parent->chil    
1627                 if (sg->orig_asce != asce ||     
1628                     sg->removed)                 
1629                         continue;                
1630                 if (!sg->initialized)            
1631                         return ERR_PTR(-EAGAI    
1632                 refcount_inc(&sg->ref_count);    
1633                 return sg;                       
1634         }                                        
1635         return NULL;                             
1636 }                                                
1637                                                  
1638 /**                                              
1639  * gmap_shadow_valid - check if a shadow gues    
1640  *                     given properties and i    
1641  * @sg: pointer to the shadow guest address s    
1642  * @asce: ASCE for which the shadow table is     
1643  * @edat_level: edat level to be used for the    
1644  *                                               
1645  * Returns 1 if the gmap shadow is still vali    
1646  * properties, the caller can continue using     
1647  * caller has to request a new shadow gmap in    
1648  *                                               
1649  */                                              
1650 int gmap_shadow_valid(struct gmap *sg, unsign    
1651 {                                                
1652         if (sg->removed)                         
1653                 return 0;                        
1654         return sg->orig_asce == asce && sg->e    
1655 }                                                
1656 EXPORT_SYMBOL_GPL(gmap_shadow_valid);            
1657                                                  
1658 /**                                              
1659  * gmap_shadow - create/find a shadow guest a    
1660  * @parent: pointer to the parent gmap           
1661  * @asce: ASCE for which the shadow table is     
1662  * @edat_level: edat level to be used for the    
1663  *                                               
1664  * The pages of the top level page table refe    
1665  * will be set to read-only and marked in the    
1666  * The shadow table will be removed automatic    
1667  * PTE mapping for the source table.             
1668  *                                               
1669  * Returns a guest address space structure, E    
1670  * ERR_PTR(-EAGAIN) if the caller has to retr    
1671  * parent gmap table could not be protected.     
1672  */                                              
1673 struct gmap *gmap_shadow(struct gmap *parent,    
1674                          int edat_level)         
1675 {                                                
1676         struct gmap *sg, *new;                   
1677         unsigned long limit;                     
1678         int rc;                                  
1679                                                  
1680         BUG_ON(parent->mm->context.allow_gmap    
1681         BUG_ON(gmap_is_shadow(parent));          
1682         spin_lock(&parent->shadow_lock);         
1683         sg = gmap_find_shadow(parent, asce, e    
1684         spin_unlock(&parent->shadow_lock);       
1685         if (sg)                                  
1686                 return sg;                       
1687         /* Create a new shadow gmap */           
1688         limit = -1UL >> (33 - (((asce & _ASCE    
1689         if (asce & _ASCE_REAL_SPACE)             
1690                 limit = -1UL;                    
1691         new = gmap_alloc(limit);                 
1692         if (!new)                                
1693                 return ERR_PTR(-ENOMEM);         
1694         new->mm = parent->mm;                    
1695         new->parent = gmap_get(parent);          
1696         new->private = parent->private;          
1697         new->orig_asce = asce;                   
1698         new->edat_level = edat_level;            
1699         new->initialized = false;                
1700         spin_lock(&parent->shadow_lock);         
1701         /* Recheck if another CPU created the    
1702         sg = gmap_find_shadow(parent, asce, e    
1703         if (sg) {                                
1704                 spin_unlock(&parent->shadow_l    
1705                 gmap_free(new);                  
1706                 return sg;                       
1707         }                                        
1708         if (asce & _ASCE_REAL_SPACE) {           
1709                 /* only allow one real-space     
1710                 list_for_each_entry(sg, &pare    
1711                         if (sg->orig_asce & _    
1712                                 spin_lock(&sg    
1713                                 gmap_unshadow    
1714                                 spin_unlock(&    
1715                                 list_del(&sg-    
1716                                 gmap_put(sg);    
1717                                 break;           
1718                         }                        
1719                 }                                
1720         }                                        
1721         refcount_set(&new->ref_count, 2);        
1722         list_add(&new->list, &parent->childre    
1723         if (asce & _ASCE_REAL_SPACE) {           
1724                 /* nothing to protect, return    
1725                 new->initialized = true;         
1726                 spin_unlock(&parent->shadow_l    
1727                 return new;                      
1728         }                                        
1729         spin_unlock(&parent->shadow_lock);       
1730         /* protect after insertion, so it wil    
1731         mmap_read_lock(parent->mm);              
1732         rc = gmap_protect_range(parent, asce     
1733                                 ((asce & _ASC    
1734                                 PROT_READ, GM    
1735         mmap_read_unlock(parent->mm);            
1736         spin_lock(&parent->shadow_lock);         
1737         new->initialized = true;                 
1738         if (rc) {                                
1739                 list_del(&new->list);            
1740                 gmap_free(new);                  
1741                 new = ERR_PTR(rc);               
1742         }                                        
1743         spin_unlock(&parent->shadow_lock);       
1744         return new;                              
1745 }                                                
1746 EXPORT_SYMBOL_GPL(gmap_shadow);                  
1747                                                  
1748 /**                                              
1749  * gmap_shadow_r2t - create an empty shadow r    
1750  * @sg: pointer to the shadow guest address s    
1751  * @saddr: faulting address in the shadow gma    
1752  * @r2t: parent gmap address of the region 2     
1753  * @fake: r2t references contiguous guest mem    
1754  *                                               
1755  * The r2t parameter specifies the address of    
1756  * four pages of the source table are made re    
1757  * address space. A write to the source table    
1758  * remove the shadow r2 table and all of its     
1759  *                                               
1760  * Returns 0 if successfully shadowed or alre    
1761  * shadow table structure is incomplete, -ENO    
1762  * -EFAULT if an address in the parent gmap c    
1763  *                                               
1764  * Called with sg->mm->mmap_lock in read.        
1765  */                                              
1766 int gmap_shadow_r2t(struct gmap *sg, unsigned    
1767                     int fake)                    
1768 {                                                
1769         unsigned long raddr, origin, offset,     
1770         unsigned long *table;                    
1771         phys_addr_t s_r2t;                       
1772         struct page *page;                       
1773         int rc;                                  
1774                                                  
1775         BUG_ON(!gmap_is_shadow(sg));             
1776         /* Allocate a shadow region second ta    
1777         page = gmap_alloc_crst();                
1778         if (!page)                               
1779                 return -ENOMEM;                  
1780         page->index = r2t & _REGION_ENTRY_ORI    
1781         if (fake)                                
1782                 page->index |= GMAP_SHADOW_FA    
1783         s_r2t = page_to_phys(page);              
1784         /* Install shadow region second table    
1785         spin_lock(&sg->guest_table_lock);        
1786         table = gmap_table_walk(sg, saddr, 4)    
1787         if (!table) {                            
1788                 rc = -EAGAIN;           /* Ra    
1789                 goto out_free;                   
1790         }                                        
1791         if (!(*table & _REGION_ENTRY_INVALID)    
1792                 rc = 0;                 /* Al    
1793                 goto out_free;                   
1794         } else if (*table & _REGION_ENTRY_ORI    
1795                 rc = -EAGAIN;           /* Ra    
1796                 goto out_free;                   
1797         }                                        
1798         crst_table_init(__va(s_r2t), _REGION2    
1799         /* mark as invalid as long as the par    
1800         *table = s_r2t | _REGION_ENTRY_LENGTH    
1801                  _REGION_ENTRY_TYPE_R1 | _REG    
1802         if (sg->edat_level >= 1)                 
1803                 *table |= (r2t & _REGION_ENTR    
1804         list_add(&page->lru, &sg->crst_list);    
1805         if (fake) {                              
1806                 /* nothing to protect for fak    
1807                 *table &= ~_REGION_ENTRY_INVA    
1808                 spin_unlock(&sg->guest_table_    
1809                 return 0;                        
1810         }                                        
1811         spin_unlock(&sg->guest_table_lock);      
1812         /* Make r2t read-only in parent gmap     
1813         raddr = (saddr & _REGION1_MASK) | _SH    
1814         origin = r2t & _REGION_ENTRY_ORIGIN;     
1815         offset = ((r2t & _REGION_ENTRY_OFFSET    
1816         len = ((r2t & _REGION_ENTRY_LENGTH) +    
1817         rc = gmap_protect_rmap(sg, raddr, ori    
1818         spin_lock(&sg->guest_table_lock);        
1819         if (!rc) {                               
1820                 table = gmap_table_walk(sg, s    
1821                 if (!table || (*table & _REGI    
1822                         rc = -EAGAIN;            
1823                 else                             
1824                         *table &= ~_REGION_EN    
1825         } else {                                 
1826                 gmap_unshadow_r2t(sg, raddr);    
1827         }                                        
1828         spin_unlock(&sg->guest_table_lock);      
1829         return rc;                               
1830 out_free:                                        
1831         spin_unlock(&sg->guest_table_lock);      
1832         __free_pages(page, CRST_ALLOC_ORDER);    
1833         return rc;                               
1834 }                                                
1835 EXPORT_SYMBOL_GPL(gmap_shadow_r2t);              
1836                                                  
1837 /**                                              
1838  * gmap_shadow_r3t - create a shadow region 3    
1839  * @sg: pointer to the shadow guest address s    
1840  * @saddr: faulting address in the shadow gma    
1841  * @r3t: parent gmap address of the region 3     
1842  * @fake: r3t references contiguous guest mem    
1843  *                                               
1844  * Returns 0 if successfully shadowed or alre    
1845  * shadow table structure is incomplete, -ENO    
1846  * -EFAULT if an address in the parent gmap c    
1847  *                                               
1848  * Called with sg->mm->mmap_lock in read.        
1849  */                                              
1850 int gmap_shadow_r3t(struct gmap *sg, unsigned    
1851                     int fake)                    
1852 {                                                
1853         unsigned long raddr, origin, offset,     
1854         unsigned long *table;                    
1855         phys_addr_t s_r3t;                       
1856         struct page *page;                       
1857         int rc;                                  
1858                                                  
1859         BUG_ON(!gmap_is_shadow(sg));             
1860         /* Allocate a shadow region second ta    
1861         page = gmap_alloc_crst();                
1862         if (!page)                               
1863                 return -ENOMEM;                  
1864         page->index = r3t & _REGION_ENTRY_ORI    
1865         if (fake)                                
1866                 page->index |= GMAP_SHADOW_FA    
1867         s_r3t = page_to_phys(page);              
1868         /* Install shadow region second table    
1869         spin_lock(&sg->guest_table_lock);        
1870         table = gmap_table_walk(sg, saddr, 3)    
1871         if (!table) {                            
1872                 rc = -EAGAIN;           /* Ra    
1873                 goto out_free;                   
1874         }                                        
1875         if (!(*table & _REGION_ENTRY_INVALID)    
1876                 rc = 0;                 /* Al    
1877                 goto out_free;                   
1878         } else if (*table & _REGION_ENTRY_ORI    
1879                 rc = -EAGAIN;           /* Ra    
1880                 goto out_free;                   
1881         }                                        
1882         crst_table_init(__va(s_r3t), _REGION3    
1883         /* mark as invalid as long as the par    
1884         *table = s_r3t | _REGION_ENTRY_LENGTH    
1885                  _REGION_ENTRY_TYPE_R2 | _REG    
1886         if (sg->edat_level >= 1)                 
1887                 *table |= (r3t & _REGION_ENTR    
1888         list_add(&page->lru, &sg->crst_list);    
1889         if (fake) {                              
1890                 /* nothing to protect for fak    
1891                 *table &= ~_REGION_ENTRY_INVA    
1892                 spin_unlock(&sg->guest_table_    
1893                 return 0;                        
1894         }                                        
1895         spin_unlock(&sg->guest_table_lock);      
1896         /* Make r3t read-only in parent gmap     
1897         raddr = (saddr & _REGION2_MASK) | _SH    
1898         origin = r3t & _REGION_ENTRY_ORIGIN;     
1899         offset = ((r3t & _REGION_ENTRY_OFFSET    
1900         len = ((r3t & _REGION_ENTRY_LENGTH) +    
1901         rc = gmap_protect_rmap(sg, raddr, ori    
1902         spin_lock(&sg->guest_table_lock);        
1903         if (!rc) {                               
1904                 table = gmap_table_walk(sg, s    
1905                 if (!table || (*table & _REGI    
1906                         rc = -EAGAIN;            
1907                 else                             
1908                         *table &= ~_REGION_EN    
1909         } else {                                 
1910                 gmap_unshadow_r3t(sg, raddr);    
1911         }                                        
1912         spin_unlock(&sg->guest_table_lock);      
1913         return rc;                               
1914 out_free:                                        
1915         spin_unlock(&sg->guest_table_lock);      
1916         __free_pages(page, CRST_ALLOC_ORDER);    
1917         return rc;                               
1918 }                                                
1919 EXPORT_SYMBOL_GPL(gmap_shadow_r3t);              
1920                                                  
1921 /**                                              
1922  * gmap_shadow_sgt - create a shadow segment     
1923  * @sg: pointer to the shadow guest address s    
1924  * @saddr: faulting address in the shadow gma    
1925  * @sgt: parent gmap address of the segment t    
1926  * @fake: sgt references contiguous guest mem    
1927  *                                               
1928  * Returns: 0 if successfully shadowed or alr    
1929  * shadow table structure is incomplete, -ENO    
1930  * -EFAULT if an address in the parent gmap c    
1931  *                                               
1932  * Called with sg->mm->mmap_lock in read.        
1933  */                                              
1934 int gmap_shadow_sgt(struct gmap *sg, unsigned    
1935                     int fake)                    
1936 {                                                
1937         unsigned long raddr, origin, offset,     
1938         unsigned long *table;                    
1939         phys_addr_t s_sgt;                       
1940         struct page *page;                       
1941         int rc;                                  
1942                                                  
1943         BUG_ON(!gmap_is_shadow(sg) || (sgt &     
1944         /* Allocate a shadow segment table */    
1945         page = gmap_alloc_crst();                
1946         if (!page)                               
1947                 return -ENOMEM;                  
1948         page->index = sgt & _REGION_ENTRY_ORI    
1949         if (fake)                                
1950                 page->index |= GMAP_SHADOW_FA    
1951         s_sgt = page_to_phys(page);              
1952         /* Install shadow region second table    
1953         spin_lock(&sg->guest_table_lock);        
1954         table = gmap_table_walk(sg, saddr, 2)    
1955         if (!table) {                            
1956                 rc = -EAGAIN;           /* Ra    
1957                 goto out_free;                   
1958         }                                        
1959         if (!(*table & _REGION_ENTRY_INVALID)    
1960                 rc = 0;                 /* Al    
1961                 goto out_free;                   
1962         } else if (*table & _REGION_ENTRY_ORI    
1963                 rc = -EAGAIN;           /* Ra    
1964                 goto out_free;                   
1965         }                                        
1966         crst_table_init(__va(s_sgt), _SEGMENT    
1967         /* mark as invalid as long as the par    
1968         *table = s_sgt | _REGION_ENTRY_LENGTH    
1969                  _REGION_ENTRY_TYPE_R3 | _REG    
1970         if (sg->edat_level >= 1)                 
1971                 *table |= sgt & _REGION_ENTRY    
1972         list_add(&page->lru, &sg->crst_list);    
1973         if (fake) {                              
1974                 /* nothing to protect for fak    
1975                 *table &= ~_REGION_ENTRY_INVA    
1976                 spin_unlock(&sg->guest_table_    
1977                 return 0;                        
1978         }                                        
1979         spin_unlock(&sg->guest_table_lock);      
1980         /* Make sgt read-only in parent gmap     
1981         raddr = (saddr & _REGION3_MASK) | _SH    
1982         origin = sgt & _REGION_ENTRY_ORIGIN;     
1983         offset = ((sgt & _REGION_ENTRY_OFFSET    
1984         len = ((sgt & _REGION_ENTRY_LENGTH) +    
1985         rc = gmap_protect_rmap(sg, raddr, ori    
1986         spin_lock(&sg->guest_table_lock);        
1987         if (!rc) {                               
1988                 table = gmap_table_walk(sg, s    
1989                 if (!table || (*table & _REGI    
1990                         rc = -EAGAIN;            
1991                 else                             
1992                         *table &= ~_REGION_EN    
1993         } else {                                 
1994                 gmap_unshadow_sgt(sg, raddr);    
1995         }                                        
1996         spin_unlock(&sg->guest_table_lock);      
1997         return rc;                               
1998 out_free:                                        
1999         spin_unlock(&sg->guest_table_lock);      
2000         __free_pages(page, CRST_ALLOC_ORDER);    
2001         return rc;                               
2002 }                                                
2003 EXPORT_SYMBOL_GPL(gmap_shadow_sgt);              
2004                                                  
2005 /**                                              
2006  * gmap_shadow_pgt_lookup - find a shadow pag    
2007  * @sg: pointer to the shadow guest address s    
2008  * @saddr: the address in the shadow aguest a    
2009  * @pgt: parent gmap address of the page tabl    
2010  * @dat_protection: if the pgtable is marked     
2011  * @fake: pgt references contiguous guest mem    
2012  *                                               
2013  * Returns 0 if the shadow page table was fou    
2014  * table was not found.                          
2015  *                                               
2016  * Called with sg->mm->mmap_lock in read.        
2017  */                                              
2018 int gmap_shadow_pgt_lookup(struct gmap *sg, u    
2019                            unsigned long *pgt    
2020                            int *fake)            
2021 {                                                
2022         unsigned long *table;                    
2023         struct page *page;                       
2024         int rc;                                  
2025                                                  
2026         BUG_ON(!gmap_is_shadow(sg));             
2027         spin_lock(&sg->guest_table_lock);        
2028         table = gmap_table_walk(sg, saddr, 1)    
2029         if (table && !(*table & _SEGMENT_ENTR    
2030                 /* Shadow page tables are ful    
2031                 page = pfn_to_page(*table >>     
2032                 *pgt = page->index & ~GMAP_SH    
2033                 *dat_protection = !!(*table &    
2034                 *fake = !!(page->index & GMAP    
2035                 rc = 0;                          
2036         } else  {                                
2037                 rc = -EAGAIN;                    
2038         }                                        
2039         spin_unlock(&sg->guest_table_lock);      
2040         return rc;                               
2041                                                  
2042 }                                                
2043 EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);       
2044                                                  
2045 /**                                              
2046  * gmap_shadow_pgt - instantiate a shadow pag    
2047  * @sg: pointer to the shadow guest address s    
2048  * @saddr: faulting address in the shadow gma    
2049  * @pgt: parent gmap address of the page tabl    
2050  * @fake: pgt references contiguous guest mem    
2051  *                                               
2052  * Returns 0 if successfully shadowed or alre    
2053  * shadow table structure is incomplete, -ENO    
2054  * -EFAULT if an address in the parent gmap c    
2055  *                                               
2056  * Called with gmap->mm->mmap_lock in read       
2057  */                                              
2058 int gmap_shadow_pgt(struct gmap *sg, unsigned    
2059                     int fake)                    
2060 {                                                
2061         unsigned long raddr, origin;             
2062         unsigned long *table;                    
2063         struct ptdesc *ptdesc;                   
2064         phys_addr_t s_pgt;                       
2065         int rc;                                  
2066                                                  
2067         BUG_ON(!gmap_is_shadow(sg) || (pgt &     
2068         /* Allocate a shadow page table */       
2069         ptdesc = page_table_alloc_pgste(sg->m    
2070         if (!ptdesc)                             
2071                 return -ENOMEM;                  
2072         ptdesc->pt_index = pgt & _SEGMENT_ENT    
2073         if (fake)                                
2074                 ptdesc->pt_index |= GMAP_SHAD    
2075         s_pgt = page_to_phys(ptdesc_page(ptde    
2076         /* Install shadow page table */          
2077         spin_lock(&sg->guest_table_lock);        
2078         table = gmap_table_walk(sg, saddr, 1)    
2079         if (!table) {                            
2080                 rc = -EAGAIN;           /* Ra    
2081                 goto out_free;                   
2082         }                                        
2083         if (!(*table & _SEGMENT_ENTRY_INVALID    
2084                 rc = 0;                 /* Al    
2085                 goto out_free;                   
2086         } else if (*table & _SEGMENT_ENTRY_OR    
2087                 rc = -EAGAIN;           /* Ra    
2088                 goto out_free;                   
2089         }                                        
2090         /* mark as invalid as long as the par    
2091         *table = (unsigned long) s_pgt | _SEG    
2092                  (pgt & _SEGMENT_ENTRY_PROTEC    
2093         list_add(&ptdesc->pt_list, &sg->pt_li    
2094         if (fake) {                              
2095                 /* nothing to protect for fak    
2096                 *table &= ~_SEGMENT_ENTRY_INV    
2097                 spin_unlock(&sg->guest_table_    
2098                 return 0;                        
2099         }                                        
2100         spin_unlock(&sg->guest_table_lock);      
2101         /* Make pgt read-only in parent gmap     
2102         raddr = (saddr & _SEGMENT_MASK) | _SH    
2103         origin = pgt & _SEGMENT_ENTRY_ORIGIN     
2104         rc = gmap_protect_rmap(sg, raddr, ori    
2105         spin_lock(&sg->guest_table_lock);        
2106         if (!rc) {                               
2107                 table = gmap_table_walk(sg, s    
2108                 if (!table || (*table & _SEGM    
2109                         rc = -EAGAIN;            
2110                 else                             
2111                         *table &= ~_SEGMENT_E    
2112         } else {                                 
2113                 gmap_unshadow_pgt(sg, raddr);    
2114         }                                        
2115         spin_unlock(&sg->guest_table_lock);      
2116         return rc;                               
2117 out_free:                                        
2118         spin_unlock(&sg->guest_table_lock);      
2119         page_table_free_pgste(ptdesc);           
2120         return rc;                               
2121                                                  
2122 }                                                
2123 EXPORT_SYMBOL_GPL(gmap_shadow_pgt);              
2124                                                  
2125 /**                                              
2126  * gmap_shadow_page - create a shadow page ma    
2127  * @sg: pointer to the shadow guest address s    
2128  * @saddr: faulting address in the shadow gma    
2129  * @pte: pte in parent gmap address space to     
2130  *                                               
2131  * Returns 0 if successfully shadowed or alre    
2132  * shadow table structure is incomplete, -ENO    
2133  * -EFAULT if an address in the parent gmap c    
2134  *                                               
2135  * Called with sg->mm->mmap_lock in read.        
2136  */                                              
2137 int gmap_shadow_page(struct gmap *sg, unsigne    
2138 {                                                
2139         struct gmap *parent;                     
2140         struct gmap_rmap *rmap;                  
2141         unsigned long vmaddr, paddr;             
2142         spinlock_t *ptl;                         
2143         pte_t *sptep, *tptep;                    
2144         int prot;                                
2145         int rc;                                  
2146                                                  
2147         BUG_ON(!gmap_is_shadow(sg));             
2148         parent = sg->parent;                     
2149         prot = (pte_val(pte) & _PAGE_PROTECT)    
2150                                                  
2151         rmap = kzalloc(sizeof(*rmap), GFP_KER    
2152         if (!rmap)                               
2153                 return -ENOMEM;                  
2154         rmap->raddr = (saddr & PAGE_MASK) | _    
2155                                                  
2156         while (1) {                              
2157                 paddr = pte_val(pte) & PAGE_M    
2158                 vmaddr = __gmap_translate(par    
2159                 if (IS_ERR_VALUE(vmaddr)) {      
2160                         rc = vmaddr;             
2161                         break;                   
2162                 }                                
2163                 rc = radix_tree_preload(GFP_K    
2164                 if (rc)                          
2165                         break;                   
2166                 rc = -EAGAIN;                    
2167                 sptep = gmap_pte_op_walk(pare    
2168                 if (sptep) {                     
2169                         spin_lock(&sg->guest_    
2170                         /* Get page table poi    
2171                         tptep = (pte_t *) gma    
2172                         if (!tptep) {            
2173                                 spin_unlock(&    
2174                                 gmap_pte_op_e    
2175                                 radix_tree_pr    
2176                                 break;           
2177                         }                        
2178                         rc = ptep_shadow_pte(    
2179                         if (rc > 0) {            
2180                                 /* Success an    
2181                                 gmap_insert_r    
2182                                 rmap = NULL;     
2183                                 rc = 0;          
2184                         }                        
2185                         gmap_pte_op_end(sptep    
2186                         spin_unlock(&sg->gues    
2187                 }                                
2188                 radix_tree_preload_end();        
2189                 if (!rc)                         
2190                         break;                   
2191                 rc = gmap_pte_op_fixup(parent    
2192                 if (rc)                          
2193                         break;                   
2194         }                                        
2195         kfree(rmap);                             
2196         return rc;                               
2197 }                                                
2198 EXPORT_SYMBOL_GPL(gmap_shadow_page);             
2199                                                  
2200 /*                                               
2201  * gmap_shadow_notify - handle notifications     
2202  *                                               
2203  * Called with sg->parent->shadow_lock.          
2204  */                                              
2205 static void gmap_shadow_notify(struct gmap *s    
2206                                unsigned long     
2207 {                                                
2208         struct gmap_rmap *rmap, *rnext, *head    
2209         unsigned long start, end, bits, raddr    
2210                                                  
2211         BUG_ON(!gmap_is_shadow(sg));             
2212                                                  
2213         spin_lock(&sg->guest_table_lock);        
2214         if (sg->removed) {                       
2215                 spin_unlock(&sg->guest_table_    
2216                 return;                          
2217         }                                        
2218         /* Check for top level table */          
2219         start = sg->orig_asce & _ASCE_ORIGIN;    
2220         end = start + ((sg->orig_asce & _ASCE    
2221         if (!(sg->orig_asce & _ASCE_REAL_SPAC    
2222             gaddr < end) {                       
2223                 /* The complete shadow table     
2224                 gmap_unshadow(sg);               
2225                 spin_unlock(&sg->guest_table_    
2226                 list_del(&sg->list);             
2227                 gmap_put(sg);                    
2228                 return;                          
2229         }                                        
2230         /* Remove the page table tree from on    
2231         head = radix_tree_delete(&sg->host_to    
2232         gmap_for_each_rmap_safe(rmap, rnext,     
2233                 bits = rmap->raddr & _SHADOW_    
2234                 raddr = rmap->raddr ^ bits;      
2235                 switch (bits) {                  
2236                 case _SHADOW_RMAP_REGION1:       
2237                         gmap_unshadow_r2t(sg,    
2238                         break;                   
2239                 case _SHADOW_RMAP_REGION2:       
2240                         gmap_unshadow_r3t(sg,    
2241                         break;                   
2242                 case _SHADOW_RMAP_REGION3:       
2243                         gmap_unshadow_sgt(sg,    
2244                         break;                   
2245                 case _SHADOW_RMAP_SEGMENT:       
2246                         gmap_unshadow_pgt(sg,    
2247                         break;                   
2248                 case _SHADOW_RMAP_PGTABLE:       
2249                         gmap_unshadow_page(sg    
2250                         break;                   
2251                 }                                
2252                 kfree(rmap);                     
2253         }                                        
2254         spin_unlock(&sg->guest_table_lock);      
2255 }                                                
2256                                                  
2257 /**                                              
2258  * ptep_notify - call all invalidation callba    
2259  * @mm: pointer to the process mm_struct         
2260  * @vmaddr: virtual address in the process ad    
2261  * @pte: pointer to the page table entry         
2262  * @bits: bits from the pgste that caused the    
2263  *                                               
2264  * This function is assumed to be called with    
2265  * for the pte to notify.                        
2266  */                                              
2267 void ptep_notify(struct mm_struct *mm, unsign    
2268                  pte_t *pte, unsigned long bi    
2269 {                                                
2270         unsigned long offset, gaddr = 0;         
2271         unsigned long *table;                    
2272         struct gmap *gmap, *sg, *next;           
2273                                                  
2274         offset = ((unsigned long) pte) & (255    
2275         offset = offset * (PAGE_SIZE / sizeof    
2276         rcu_read_lock();                         
2277         list_for_each_entry_rcu(gmap, &mm->co    
2278                 spin_lock(&gmap->guest_table_    
2279                 table = radix_tree_lookup(&gm    
2280                                           vma    
2281                 if (table)                       
2282                         gaddr = __gmap_segmen    
2283                 spin_unlock(&gmap->guest_tabl    
2284                 if (!table)                      
2285                         continue;                
2286                                                  
2287                 if (!list_empty(&gmap->childr    
2288                         spin_lock(&gmap->shad    
2289                         list_for_each_entry_s    
2290                                                  
2291                                 gmap_shadow_n    
2292                         spin_unlock(&gmap->sh    
2293                 }                                
2294                 if (bits & PGSTE_IN_BIT)         
2295                         gmap_call_notifier(gm    
2296         }                                        
2297         rcu_read_unlock();                       
2298 }                                                
2299 EXPORT_SYMBOL_GPL(ptep_notify);                  
2300                                                  
2301 static void pmdp_notify_gmap(struct gmap *gma    
2302                              unsigned long ga    
2303 {                                                
2304         set_pmd(pmdp, clear_pmd_bit(*pmdp, __    
2305         gmap_call_notifier(gmap, gaddr, gaddr    
2306 }                                                
2307                                                  
2308 /**                                              
2309  * gmap_pmdp_xchg - exchange a gmap pmd with     
2310  * @gmap: pointer to the guest address space     
2311  * @pmdp: pointer to the pmd entry               
2312  * @new: replacement entry                       
2313  * @gaddr: the affected guest address            
2314  *                                               
2315  * This function is assumed to be called with    
2316  * held.                                         
2317  */                                              
2318 static void gmap_pmdp_xchg(struct gmap *gmap,    
2319                            unsigned long gadd    
2320 {                                                
2321         gaddr &= HPAGE_MASK;                     
2322         pmdp_notify_gmap(gmap, pmdp, gaddr);     
2323         new = clear_pmd_bit(new, __pgprot(_SE    
2324         if (MACHINE_HAS_TLB_GUEST)               
2325                 __pmdp_idte(gaddr, (pmd_t *)p    
2326                             IDTE_GLOBAL);        
2327         else if (MACHINE_HAS_IDTE)               
2328                 __pmdp_idte(gaddr, (pmd_t *)p    
2329         else                                     
2330                 __pmdp_csp(pmdp);                
2331         set_pmd(pmdp, new);                      
2332 }                                                
2333                                                  
2334 static void gmap_pmdp_clear(struct mm_struct     
2335                             int purge)           
2336 {                                                
2337         pmd_t *pmdp;                             
2338         struct gmap *gmap;                       
2339         unsigned long gaddr;                     
2340                                                  
2341         rcu_read_lock();                         
2342         list_for_each_entry_rcu(gmap, &mm->co    
2343                 spin_lock(&gmap->guest_table_    
2344                 pmdp = (pmd_t *)radix_tree_de    
2345                                                  
2346                 if (pmdp) {                      
2347                         gaddr = __gmap_segmen    
2348                         pmdp_notify_gmap(gmap    
2349                         WARN_ON(pmd_val(*pmdp    
2350                                                  
2351                         if (purge)               
2352                                 __pmdp_csp(pm    
2353                         set_pmd(pmdp, __pmd(_    
2354                 }                                
2355                 spin_unlock(&gmap->guest_tabl    
2356         }                                        
2357         rcu_read_unlock();                       
2358 }                                                
2359                                                  
2360 /**                                              
2361  * gmap_pmdp_invalidate - invalidate all affe    
2362  *                        flushing               
2363  * @mm: pointer to the process mm_struct         
2364  * @vmaddr: virtual address in the process ad    
2365  */                                              
2366 void gmap_pmdp_invalidate(struct mm_struct *m    
2367 {                                                
2368         gmap_pmdp_clear(mm, vmaddr, 0);          
2369 }                                                
2370 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);         
2371                                                  
2372 /**                                              
2373  * gmap_pmdp_csp - csp all affected guest pmd    
2374  * @mm: pointer to the process mm_struct         
2375  * @vmaddr: virtual address in the process ad    
2376  */                                              
2377 void gmap_pmdp_csp(struct mm_struct *mm, unsi    
2378 {                                                
2379         gmap_pmdp_clear(mm, vmaddr, 1);          
2380 }                                                
2381 EXPORT_SYMBOL_GPL(gmap_pmdp_csp);                
2382                                                  
2383 /**                                              
2384  * gmap_pmdp_idte_local - invalidate and clea    
2385  * @mm: pointer to the process mm_struct         
2386  * @vmaddr: virtual address in the process ad    
2387  */                                              
2388 void gmap_pmdp_idte_local(struct mm_struct *m    
2389 {                                                
2390         unsigned long *entry, gaddr;             
2391         struct gmap *gmap;                       
2392         pmd_t *pmdp;                             
2393                                                  
2394         rcu_read_lock();                         
2395         list_for_each_entry_rcu(gmap, &mm->co    
2396                 spin_lock(&gmap->guest_table_    
2397                 entry = radix_tree_delete(&gm    
2398                                           vma    
2399                 if (entry) {                     
2400                         pmdp = (pmd_t *)entry    
2401                         gaddr = __gmap_segmen    
2402                         pmdp_notify_gmap(gmap    
2403                         WARN_ON(*entry & ~(_S    
2404                                            _S    
2405                         if (MACHINE_HAS_TLB_G    
2406                                 __pmdp_idte(g    
2407                                             g    
2408                         else if (MACHINE_HAS_    
2409                                 __pmdp_idte(g    
2410                         *entry = _SEGMENT_ENT    
2411                 }                                
2412                 spin_unlock(&gmap->guest_tabl    
2413         }                                        
2414         rcu_read_unlock();                       
2415 }                                                
2416 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);         
2417                                                  
2418 /**                                              
2419  * gmap_pmdp_idte_global - invalidate and cle    
2420  * @mm: pointer to the process mm_struct         
2421  * @vmaddr: virtual address in the process ad    
2422  */                                              
2423 void gmap_pmdp_idte_global(struct mm_struct *    
2424 {                                                
2425         unsigned long *entry, gaddr;             
2426         struct gmap *gmap;                       
2427         pmd_t *pmdp;                             
2428                                                  
2429         rcu_read_lock();                         
2430         list_for_each_entry_rcu(gmap, &mm->co    
2431                 spin_lock(&gmap->guest_table_    
2432                 entry = radix_tree_delete(&gm    
2433                                           vma    
2434                 if (entry) {                     
2435                         pmdp = (pmd_t *)entry    
2436                         gaddr = __gmap_segmen    
2437                         pmdp_notify_gmap(gmap    
2438                         WARN_ON(*entry & ~(_S    
2439                                            _S    
2440                         if (MACHINE_HAS_TLB_G    
2441                                 __pmdp_idte(g    
2442                                             g    
2443                         else if (MACHINE_HAS_    
2444                                 __pmdp_idte(g    
2445                         else                     
2446                                 __pmdp_csp(pm    
2447                         *entry = _SEGMENT_ENT    
2448                 }                                
2449                 spin_unlock(&gmap->guest_tabl    
2450         }                                        
2451         rcu_read_unlock();                       
2452 }                                                
2453 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);        
2454                                                  
2455 /**                                              
2456  * gmap_test_and_clear_dirty_pmd - test and r    
2457  * @gmap: pointer to guest address space         
2458  * @pmdp: pointer to the pmd to be tested        
2459  * @gaddr: virtual address in the guest addre    
2460  *                                               
2461  * This function is assumed to be called with    
2462  * held.                                         
2463  */                                              
2464 static bool gmap_test_and_clear_dirty_pmd(str    
2465                                           uns    
2466 {                                                
2467         if (pmd_val(*pmdp) & _SEGMENT_ENTRY_I    
2468                 return false;                    
2469                                                  
2470         /* Already protected memory, which di    
2471         if (pmd_val(*pmdp) & _SEGMENT_ENTRY_P    
2472             !(pmd_val(*pmdp) & _SEGMENT_ENTRY    
2473                 return false;                    
2474                                                  
2475         /* Clear UC indication and reset prot    
2476         set_pmd(pmdp, clear_pmd_bit(*pmdp, __    
2477         gmap_protect_pmd(gmap, gaddr, pmdp, P    
2478         return true;                             
2479 }                                                
2480                                                  
2481 /**                                              
2482  * gmap_sync_dirty_log_pmd - set bitmap based    
2483  * @gmap: pointer to guest address space         
2484  * @bitmap: dirty bitmap for this pmd            
2485  * @gaddr: virtual address in the guest addre    
2486  * @vmaddr: virtual address in the host addre    
2487  *                                               
2488  * This function is assumed to be called with    
2489  * held.                                         
2490  */                                              
2491 void gmap_sync_dirty_log_pmd(struct gmap *gma    
2492                              unsigned long ga    
2493 {                                                
2494         int i;                                   
2495         pmd_t *pmdp;                             
2496         pte_t *ptep;                             
2497         spinlock_t *ptl;                         
2498                                                  
2499         pmdp = gmap_pmd_op_walk(gmap, gaddr);    
2500         if (!pmdp)                               
2501                 return;                          
2502                                                  
2503         if (pmd_leaf(*pmdp)) {                   
2504                 if (gmap_test_and_clear_dirty    
2505                         bitmap_fill(bitmap, _    
2506         } else {                                 
2507                 for (i = 0; i < _PAGE_ENTRIES    
2508                         ptep = pte_alloc_map_    
2509                         if (!ptep)               
2510                                 continue;        
2511                         if (ptep_test_and_cle    
2512                                 set_bit(i, bi    
2513                         pte_unmap_unlock(ptep    
2514                 }                                
2515         }                                        
2516         gmap_pmd_op_end(gmap, pmdp);             
2517 }                                                
2518 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);      
2519                                                  
2520 #ifdef CONFIG_TRANSPARENT_HUGEPAGE               
2521 static int thp_split_walk_pmd_entry(pmd_t *pm    
2522                                     unsigned     
2523 {                                                
2524         struct vm_area_struct *vma = walk->vm    
2525                                                  
2526         split_huge_pmd(vma, pmd, addr);          
2527         return 0;                                
2528 }                                                
2529                                                  
2530 static const struct mm_walk_ops thp_split_wal    
2531         .pmd_entry      = thp_split_walk_pmd_    
2532         .walk_lock      = PGWALK_WRLOCK_VERIF    
2533 };                                               
2534                                                  
2535 static inline void thp_split_mm(struct mm_str    
2536 {                                                
2537         struct vm_area_struct *vma;              
2538         VMA_ITERATOR(vmi, mm, 0);                
2539                                                  
2540         for_each_vma(vmi, vma) {                 
2541                 vm_flags_mod(vma, VM_NOHUGEPA    
2542                 walk_page_vma(vma, &thp_split    
2543         }                                        
2544         mm->def_flags |= VM_NOHUGEPAGE;          
2545 }                                                
2546 #else                                            
2547 static inline void thp_split_mm(struct mm_str    
2548 {                                                
2549 }                                                
2550 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */         
2551                                                  
2552 /*                                               
2553  * switch on pgstes for its userspace process    
2554  */                                              
2555 int s390_enable_sie(void)                        
2556 {                                                
2557         struct mm_struct *mm = current->mm;      
2558                                                  
2559         /* Do we have pgstes? if yes, we are     
2560         if (mm_has_pgste(mm))                    
2561                 return 0;                        
2562         /* Fail if the page tables are 2K */     
2563         if (!mm_alloc_pgste(mm))                 
2564                 return -EINVAL;                  
2565         mmap_write_lock(mm);                     
2566         mm->context.has_pgste = 1;               
2567         /* split thp mappings and disable thp    
2568         thp_split_mm(mm);                        
2569         mmap_write_unlock(mm);                   
2570         return 0;                                
2571 }                                                
2572 EXPORT_SYMBOL_GPL(s390_enable_sie);              
2573                                                  
2574 static int find_zeropage_pte_entry(pte_t *pte    
2575                                    unsigned l    
2576 {                                                
2577         unsigned long *found_addr = walk->pri    
2578                                                  
2579         /* Return 1 of the page is a zeropage    
2580         if (is_zero_pfn(pte_pfn(*pte))) {        
2581                 /*                               
2582                  * Shared zeropage in e.g., a    
2583                  * right thing and likely don    
2584                  * currently only works in CO    
2585                  * mm_forbids_zeropage() is c    
2586                  */                              
2587                 if (!is_cow_mapping(walk->vma    
2588                         return -EFAULT;          
2589                                                  
2590                 *found_addr = addr;              
2591                 return 1;                        
2592         }                                        
2593         return 0;                                
2594 }                                                
2595                                                  
2596 static const struct mm_walk_ops find_zeropage    
2597         .pte_entry      = find_zeropage_pte_e    
2598         .walk_lock      = PGWALK_WRLOCK,         
2599 };                                               
2600                                                  
2601 /*                                               
2602  * Unshare all shared zeropages, replacing th    
2603  * we cannot simply zap all shared zeropages,    
2604  * trigger unexpected userfaultfd missing eve    
2605  *                                               
2606  * This must be called after mm->context.allo    
2607  * set to 0, to avoid future mappings of shar    
2608  *                                               
2609  * mm contracts with s390, that even if mm we    
2610  * and racing with walk_page_range_vma() call    
2611  * would fail, it will never insert a page ta    
2612  * pages once mm_forbids_zeropage(mm) i.e.       
2613  * mm->context.allow_cow_sharing is set to 0.    
2614  */                                              
2615 static int __s390_unshare_zeropages(struct mm    
2616 {                                                
2617         struct vm_area_struct *vma;              
2618         VMA_ITERATOR(vmi, mm, 0);                
2619         unsigned long addr;                      
2620         vm_fault_t fault;                        
2621         int rc;                                  
2622                                                  
2623         for_each_vma(vmi, vma) {                 
2624                 /*                               
2625                  * We could only look at COW     
2626                  * proof to catch unexpected     
2627                  * fail.                         
2628                  */                              
2629                 if ((vma->vm_flags & VM_PFNMA    
2630                         continue;                
2631                 addr = vma->vm_start;            
2632                                                  
2633 retry:                                           
2634                 rc = walk_page_range_vma(vma,    
2635                                          &fin    
2636                 if (rc < 0)                      
2637                         return rc;               
2638                 else if (!rc)                    
2639                         continue;                
2640                                                  
2641                 /* addr was updated by find_z    
2642                 fault = handle_mm_fault(vma,     
2643                                         FAULT    
2644                                         NULL)    
2645                 if (fault & VM_FAULT_OOM)        
2646                         return -ENOMEM;          
2647                 /*                               
2648                  * See break_ksm(): even afte    
2649                  * must start the lookup from    
2650                  * handle_mm_fault() may back    
2651                  *                               
2652                  * VM_FAULT_SIGBUS and VM_FAU    
2653                  * maybe they could trigger i    
2654                  * truncation. In that case,     
2655                  * and we can simply retry an    
2656                  */                              
2657                 cond_resched();                  
2658                 goto retry;                      
2659         }                                        
2660                                                  
2661         return 0;                                
2662 }                                                
2663                                                  
2664 static int __s390_disable_cow_sharing(struct     
2665 {                                                
2666         int rc;                                  
2667                                                  
2668         if (!mm->context.allow_cow_sharing)      
2669                 return 0;                        
2670                                                  
2671         mm->context.allow_cow_sharing = 0;       
2672                                                  
2673         /* Replace all shared zeropages by an    
2674         rc = __s390_unshare_zeropages(mm);       
2675         /*                                       
2676          * Make sure to disable KSM (if enabl    
2677          * individual VMAs). Note that nothin    
2678          * from re-enabling it.                  
2679          */                                      
2680         if (!rc)                                 
2681                 rc = ksm_disable(mm);            
2682         if (rc)                                  
2683                 mm->context.allow_cow_sharing    
2684         return rc;                               
2685 }                                                
2686                                                  
2687 /*                                               
2688  * Disable most COW-sharing of memory pages f    
2689  * (1) Disable KSM and unmerge/unshare any KS    
2690  * (2) Disallow shared zeropages and unshare     
2691  *                                               
2692  * Not that we currently don't bother with CO    
2693  * with parent/child processes due to fork().    
2694  */                                              
2695 int s390_disable_cow_sharing(void)               
2696 {                                                
2697         int rc;                                  
2698                                                  
2699         mmap_write_lock(current->mm);            
2700         rc = __s390_disable_cow_sharing(curre    
2701         mmap_write_unlock(current->mm);          
2702         return rc;                               
2703 }                                                
2704 EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);     
2705                                                  
2706 /*                                               
2707  * Enable storage key handling from now on an    
2708  * keys with the default key.                    
2709  */                                              
2710 static int __s390_enable_skey_pte(pte_t *pte,    
2711                                   unsigned lo    
2712 {                                                
2713         /* Clear storage key */                  
2714         ptep_zap_key(walk->mm, addr, pte);       
2715         return 0;                                
2716 }                                                
2717                                                  
2718 /*                                               
2719  * Give a chance to schedule after setting a     
2720  * We only hold the mm lock, which is a rwsem    
2721  * Both can sleep.                               
2722  */                                              
2723 static int __s390_enable_skey_pmd(pmd_t *pmd,    
2724                                   unsigned lo    
2725 {                                                
2726         cond_resched();                          
2727         return 0;                                
2728 }                                                
2729                                                  
2730 static int __s390_enable_skey_hugetlb(pte_t *    
2731                                       unsigne    
2732                                       struct     
2733 {                                                
2734         pmd_t *pmd = (pmd_t *)pte;               
2735         unsigned long start, end;                
2736         struct folio *folio = page_folio(pmd_    
2737                                                  
2738         /*                                       
2739          * The write check makes sure we do n    
2740          * memory. This is needed as the walk    
2741          * between actual guest memory and th    
2742          * shared libraries.                     
2743          */                                      
2744         if (pmd_val(*pmd) & _SEGMENT_ENTRY_IN    
2745             !(pmd_val(*pmd) & _SEGMENT_ENTRY_    
2746                 return 0;                        
2747                                                  
2748         start = pmd_val(*pmd) & HPAGE_MASK;      
2749         end = start + HPAGE_SIZE;                
2750         __storage_key_init_range(start, end);    
2751         set_bit(PG_arch_1, &folio->flags);       
2752         cond_resched();                          
2753         return 0;                                
2754 }                                                
2755                                                  
2756 static const struct mm_walk_ops enable_skey_w    
2757         .hugetlb_entry          = __s390_enab    
2758         .pte_entry              = __s390_enab    
2759         .pmd_entry              = __s390_enab    
2760         .walk_lock              = PGWALK_WRLO    
2761 };                                               
2762                                                  
2763 int s390_enable_skey(void)                       
2764 {                                                
2765         struct mm_struct *mm = current->mm;      
2766         int rc = 0;                              
2767                                                  
2768         mmap_write_lock(mm);                     
2769         if (mm_uses_skeys(mm))                   
2770                 goto out_up;                     
2771                                                  
2772         mm->context.uses_skeys = 1;              
2773         rc = __s390_disable_cow_sharing(mm);     
2774         if (rc) {                                
2775                 mm->context.uses_skeys = 0;      
2776                 goto out_up;                     
2777         }                                        
2778         walk_page_range(mm, 0, TASK_SIZE, &en    
2779                                                  
2780 out_up:                                          
2781         mmap_write_unlock(mm);                   
2782         return rc;                               
2783 }                                                
2784 EXPORT_SYMBOL_GPL(s390_enable_skey);             
2785                                                  
2786 /*                                               
2787  * Reset CMMA state, make all pages stable ag    
2788  */                                              
2789 static int __s390_reset_cmma(pte_t *pte, unsi    
2790                              unsigned long ne    
2791 {                                                
2792         ptep_zap_unused(walk->mm, addr, pte,     
2793         return 0;                                
2794 }                                                
2795                                                  
2796 static const struct mm_walk_ops reset_cmma_wa    
2797         .pte_entry              = __s390_rese    
2798         .walk_lock              = PGWALK_WRLO    
2799 };                                               
2800                                                  
2801 void s390_reset_cmma(struct mm_struct *mm)       
2802 {                                                
2803         mmap_write_lock(mm);                     
2804         walk_page_range(mm, 0, TASK_SIZE, &re    
2805         mmap_write_unlock(mm);                   
2806 }                                                
2807 EXPORT_SYMBOL_GPL(s390_reset_cmma);              
2808                                                  
2809 #define GATHER_GET_PAGES 32                      
2810                                                  
2811 struct reset_walk_state {                        
2812         unsigned long next;                      
2813         unsigned long count;                     
2814         unsigned long pfns[GATHER_GET_PAGES];    
2815 };                                               
2816                                                  
2817 static int s390_gather_pages(pte_t *ptep, uns    
2818                              unsigned long ne    
2819 {                                                
2820         struct reset_walk_state *p = walk->pr    
2821         pte_t pte = READ_ONCE(*ptep);            
2822                                                  
2823         if (pte_present(pte)) {                  
2824                 /* we have a reference from t    
2825                 get_page(phys_to_page(pte_val    
2826                 p->pfns[p->count] = phys_to_p    
2827                 p->next = next;                  
2828                 p->count++;                      
2829         }                                        
2830         return p->count >= GATHER_GET_PAGES;     
2831 }                                                
2832                                                  
2833 static const struct mm_walk_ops gather_pages_    
2834         .pte_entry = s390_gather_pages,          
2835         .walk_lock = PGWALK_RDLOCK,              
2836 };                                               
2837                                                  
2838 /*                                               
2839  * Call the Destroy secure page UVC on each p    
2840  * Each page needs to have an extra reference    
2841  */                                              
2842 void s390_uv_destroy_pfns(unsigned long count    
2843 {                                                
2844         struct folio *folio;                     
2845         unsigned long i;                         
2846                                                  
2847         for (i = 0; i < count; i++) {            
2848                 folio = pfn_folio(pfns[i]);      
2849                 /* we always have an extra re    
2850                 uv_destroy_folio(folio);         
2851                 /* get rid of the extra refer    
2852                 folio_put(folio);                
2853                 cond_resched();                  
2854         }                                        
2855 }                                                
2856 EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);         
2857                                                  
2858 /**                                              
2859  * __s390_uv_destroy_range - Call the destroy    
2860  * in the given range of the given address sp    
2861  * @mm: the mm to operate on                     
2862  * @start: the start of the range                
2863  * @end: the end of the range                    
2864  * @interruptible: if not 0, stop when a fata    
2865  *                                               
2866  * Walk the given range of the given address     
2867  * secure page UVC on each page. Optionally e    
2868  * pending.                                      
2869  *                                               
2870  * Return: 0 on success, -EINTR if the functi    
2871  */                                              
2872 int __s390_uv_destroy_range(struct mm_struct     
2873                             unsigned long end    
2874 {                                                
2875         struct reset_walk_state state = { .ne    
2876         int r = 1;                               
2877                                                  
2878         while (r > 0) {                          
2879                 state.count = 0;                 
2880                 mmap_read_lock(mm);              
2881                 r = walk_page_range(mm, state    
2882                 mmap_read_unlock(mm);            
2883                 cond_resched();                  
2884                 s390_uv_destroy_pfns(state.co    
2885                 if (interruptible && fatal_si    
2886                         return -EINTR;           
2887         }                                        
2888         return 0;                                
2889 }                                                
2890 EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);      
2891                                                  
2892 /**                                              
2893  * s390_unlist_old_asce - Remove the topmost     
2894  * list of page tables of the gmap.              
2895  * @gmap: the gmap whose table is to be remov    
2896  *                                               
2897  * On s390x, KVM keeps a list of all pages co    
2898  * gmap (the CRST list). This list is used at    
2899  * pages that are now not needed anymore.        
2900  *                                               
2901  * This function removes the topmost page of     
2902  * the ASCE) from the CRST list.                 
2903  *                                               
2904  * This means that it will not be freed when     
2905  * to be handled separately by the caller, un    
2906  * intended. Notice that this function will o    
2907  * list, the page will still be used as a top    
2908  */                                              
2909 void s390_unlist_old_asce(struct gmap *gmap)     
2910 {                                                
2911         struct page *old;                        
2912                                                  
2913         old = virt_to_page(gmap->table);         
2914         spin_lock(&gmap->guest_table_lock);      
2915         list_del(&old->lru);                     
2916         /*                                       
2917          * Sometimes the topmost page might n    
2918          * times, for example if the VM is re    
2919          * times concurrently, or if s390_rep    
2920          * s390_remove_old_asce and is attemp    
2921          * the old asce has been removed from    
2922          * will not be freed when the VM term    
2923          * in use and still pointed to.          
2924          * A subsequent call to replace_asce     
2925          * to remove the same page from the l    
2926          * Therefore it's necessary that the     
2927          * pointers, so list_del can work (an    
2928          * dereferencing stale or invalid poi    
2929          */                                      
2930         INIT_LIST_HEAD(&old->lru);               
2931         spin_unlock(&gmap->guest_table_lock);    
2932 }                                                
2933 EXPORT_SYMBOL_GPL(s390_unlist_old_asce);         
2934                                                  
2935 /**                                              
2936  * s390_replace_asce - Try to replace the cur    
2937  * @gmap: the gmap whose ASCE needs to be rep    
2938  *                                               
2939  * If the ASCE is a SEGMENT type then this fu    
2940  * otherwise the pointers in the host_to_gues    
2941  * to the wrong pages, causing use-after-free    
2942  * If the allocation of the new top level pag    
2943  * replaced.                                     
2944  * In any case, the old ASCE is always remove    
2945  * Therefore the caller has to make sure to s    
2946  * beforehand, unless a leak is actually inte    
2947  */                                              
2948 int s390_replace_asce(struct gmap *gmap)         
2949 {                                                
2950         unsigned long asce;                      
2951         struct page *page;                       
2952         void *table;                             
2953                                                  
2954         s390_unlist_old_asce(gmap);              
2955                                                  
2956         /* Replacing segment type ASCEs would    
2957         if ((gmap->asce & _ASCE_TYPE_MASK) ==    
2958                 return -EINVAL;                  
2959                                                  
2960         page = gmap_alloc_crst();                
2961         if (!page)                               
2962                 return -ENOMEM;                  
2963         page->index = 0;                         
2964         table = page_to_virt(page);              
2965         memcpy(table, gmap->table, 1UL << (CR    
2966                                                  
2967         /*                                       
2968          * The caller has to deal with the ol    
2969          * the new one is properly added to t    
2970          * it will be freed when the VM is to    
2971          */                                      
2972         spin_lock(&gmap->guest_table_lock);      
2973         list_add(&page->lru, &gmap->crst_list    
2974         spin_unlock(&gmap->guest_table_lock);    
2975                                                  
2976         /* Set new table origin while preserv    
2977         asce = (gmap->asce & ~_ASCE_ORIGIN) |    
2978         WRITE_ONCE(gmap->asce, asce);            
2979         WRITE_ONCE(gmap->mm->context.gmap_asc    
2980         WRITE_ONCE(gmap->table, table);          
2981                                                  
2982         return 0;                                
2983 }                                                
2984 EXPORT_SYMBOL_GPL(s390_replace_asce);            
2985                                                  

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php