1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * z3fold.c 3 * z3fold.c 4 * 4 * 5 * Author: Vitaly Wool <vitaly.wool@konsulko.c 5 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 6 * Copyright (C) 2016, Sony Mobile Communicati 6 * Copyright (C) 2016, Sony Mobile Communications Inc. 7 * 7 * 8 * This implementation is based on zbud writte 8 * This implementation is based on zbud written by Seth Jennings. 9 * 9 * 10 * z3fold is an special purpose allocator for 10 * z3fold is an special purpose allocator for storing compressed pages. It 11 * can store up to three compressed pages per 11 * can store up to three compressed pages per page which improves the 12 * compression ratio of zbud while retaining i 12 * compression ratio of zbud while retaining its main concepts (e. g. always 13 * storing an integral number of objects per p 13 * storing an integral number of objects per page) and simplicity. 14 * It still has simple and deterministic recla 14 * It still has simple and deterministic reclaim properties that make it 15 * preferable to a higher density approach (wi 15 * preferable to a higher density approach (with no requirement on integral 16 * number of object per page) when reclaim is 16 * number of object per page) when reclaim is used. 17 * 17 * 18 * As in zbud, pages are divided into "chunks" 18 * As in zbud, pages are divided into "chunks". The size of the chunks is 19 * fixed at compile time and is determined by 19 * fixed at compile time and is determined by NCHUNKS_ORDER below. 20 * 20 * 21 * z3fold doesn't export any API and is meant 21 * z3fold doesn't export any API and is meant to be used via zpool API. 22 */ 22 */ 23 23 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 25 25 26 #include <linux/atomic.h> 26 #include <linux/atomic.h> 27 #include <linux/sched.h> 27 #include <linux/sched.h> 28 #include <linux/cpumask.h> 28 #include <linux/cpumask.h> 29 #include <linux/list.h> 29 #include <linux/list.h> 30 #include <linux/mm.h> 30 #include <linux/mm.h> 31 #include <linux/module.h> 31 #include <linux/module.h> 32 #include <linux/page-flags.h> 32 #include <linux/page-flags.h> 33 #include <linux/migrate.h> 33 #include <linux/migrate.h> 34 #include <linux/node.h> 34 #include <linux/node.h> 35 #include <linux/compaction.h> 35 #include <linux/compaction.h> 36 #include <linux/percpu.h> 36 #include <linux/percpu.h> 37 #include <linux/preempt.h> 37 #include <linux/preempt.h> 38 #include <linux/workqueue.h> 38 #include <linux/workqueue.h> 39 #include <linux/slab.h> 39 #include <linux/slab.h> 40 #include <linux/spinlock.h> 40 #include <linux/spinlock.h> 41 #include <linux/zpool.h> 41 #include <linux/zpool.h> 42 #include <linux/kmemleak.h> 42 #include <linux/kmemleak.h> 43 43 44 /* 44 /* 45 * NCHUNKS_ORDER determines the internal alloc 45 * NCHUNKS_ORDER determines the internal allocation granularity, effectively 46 * adjusting internal fragmentation. It also 46 * adjusting internal fragmentation. It also determines the number of 47 * freelists maintained in each pool. NCHUNKS_ 47 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 48 * allocation granularity will be in chunks of 48 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 49 * in the beginning of an allocated page are o 49 * in the beginning of an allocated page are occupied by z3fold header, so 50 * NCHUNKS will be calculated to 63 (or 62 in 50 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 51 * which shows the max number of free chunks i 51 * which shows the max number of free chunks in z3fold page, also there will 52 * be 63, or 62, respectively, freelists per p 52 * be 63, or 62, respectively, freelists per pool. 53 */ 53 */ 54 #define NCHUNKS_ORDER 6 54 #define NCHUNKS_ORDER 6 55 55 56 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ 56 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 57 #define CHUNK_SIZE (1 << CHUNK_SHIFT) 57 #define CHUNK_SIZE (1 << CHUNK_SHIFT) 58 #define ZHDR_SIZE_ALIGNED round_up(sizeof(stru 58 #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 59 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> 59 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 60 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SH 60 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 61 #define NCHUNKS (TOTAL_CHUNKS - ZHDR_C 61 #define NCHUNKS (TOTAL_CHUNKS - ZHDR_CHUNKS) 62 62 63 #define BUDDY_MASK (0x3) 63 #define BUDDY_MASK (0x3) 64 #define BUDDY_SHIFT 2 64 #define BUDDY_SHIFT 2 65 #define SLOTS_ALIGN (0x40) 65 #define SLOTS_ALIGN (0x40) 66 66 67 /***************** 67 /***************** 68 * Structures 68 * Structures 69 *****************/ 69 *****************/ 70 struct z3fold_pool; 70 struct z3fold_pool; 71 71 72 enum buddy { 72 enum buddy { 73 HEADLESS = 0, 73 HEADLESS = 0, 74 FIRST, 74 FIRST, 75 MIDDLE, 75 MIDDLE, 76 LAST, 76 LAST, 77 BUDDIES_MAX = LAST 77 BUDDIES_MAX = LAST 78 }; 78 }; 79 79 80 struct z3fold_buddy_slots { 80 struct z3fold_buddy_slots { 81 /* 81 /* 82 * we are using BUDDY_MASK in handle_t 82 * we are using BUDDY_MASK in handle_to_buddy etc. so there should 83 * be enough slots to hold all possibl 83 * be enough slots to hold all possible variants 84 */ 84 */ 85 unsigned long slot[BUDDY_MASK + 1]; 85 unsigned long slot[BUDDY_MASK + 1]; 86 unsigned long pool; /* back link */ 86 unsigned long pool; /* back link */ 87 rwlock_t lock; 87 rwlock_t lock; 88 }; 88 }; 89 #define HANDLE_FLAG_MASK (0x03) 89 #define HANDLE_FLAG_MASK (0x03) 90 90 91 /* 91 /* 92 * struct z3fold_header - z3fold page metadata 92 * struct z3fold_header - z3fold page metadata occupying first chunks of each 93 * z3fold page, except fo 93 * z3fold page, except for HEADLESS pages 94 * @buddy: links the z3fold page 94 * @buddy: links the z3fold page into the relevant list in the 95 * pool 95 * pool 96 * @page_lock: per-page lock 96 * @page_lock: per-page lock 97 * @refcount: reference count for th 97 * @refcount: reference count for the z3fold page 98 * @work: work_struct for page l 98 * @work: work_struct for page layout optimization 99 * @slots: pointer to the structu 99 * @slots: pointer to the structure holding buddy slots 100 * @pool: pointer to the contain 100 * @pool: pointer to the containing pool 101 * @cpu: CPU which this page "b 101 * @cpu: CPU which this page "belongs" to 102 * @first_chunks: the size of the first 102 * @first_chunks: the size of the first buddy in chunks, 0 if free 103 * @middle_chunks: the size of the middle 103 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 104 * @last_chunks: the size of the last b 104 * @last_chunks: the size of the last buddy in chunks, 0 if free 105 * @first_num: the starting number (f 105 * @first_num: the starting number (for the first handle) 106 * @mapped_count: the number of objects 106 * @mapped_count: the number of objects currently mapped 107 */ 107 */ 108 struct z3fold_header { 108 struct z3fold_header { 109 struct list_head buddy; 109 struct list_head buddy; 110 spinlock_t page_lock; 110 spinlock_t page_lock; 111 struct kref refcount; 111 struct kref refcount; 112 struct work_struct work; 112 struct work_struct work; 113 struct z3fold_buddy_slots *slots; 113 struct z3fold_buddy_slots *slots; 114 struct z3fold_pool *pool; 114 struct z3fold_pool *pool; 115 short cpu; 115 short cpu; 116 unsigned short first_chunks; 116 unsigned short first_chunks; 117 unsigned short middle_chunks; 117 unsigned short middle_chunks; 118 unsigned short last_chunks; 118 unsigned short last_chunks; 119 unsigned short start_middle; 119 unsigned short start_middle; 120 unsigned short first_num:2; 120 unsigned short first_num:2; 121 unsigned short mapped_count:2; 121 unsigned short mapped_count:2; 122 unsigned short foreign_handles:2; 122 unsigned short foreign_handles:2; 123 }; 123 }; 124 124 125 /** 125 /** 126 * struct z3fold_pool - stores metadata for ea 126 * struct z3fold_pool - stores metadata for each z3fold pool 127 * @name: pool name 127 * @name: pool name 128 * @lock: protects pool unbuddied lists 128 * @lock: protects pool unbuddied lists 129 * @stale_lock: protects pool stale page list 129 * @stale_lock: protects pool stale page list 130 * @unbuddied: per-cpu array of lists trackin 130 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 131 * buddies; the list each z3fold 131 * buddies; the list each z3fold page is added to depends on 132 * the size of its free region. 132 * the size of its free region. 133 * @stale: list of pages marked for freei 133 * @stale: list of pages marked for freeing 134 * @pages_nr: number of z3fold pages in the 134 * @pages_nr: number of z3fold pages in the pool. 135 * @c_handle: cache for z3fold_buddy_slots a 135 * @c_handle: cache for z3fold_buddy_slots allocation 136 * @compact_wq: workqueue for page layout back 136 * @compact_wq: workqueue for page layout background optimization 137 * @release_wq: workqueue for safe page releas 137 * @release_wq: workqueue for safe page release 138 * @work: work_struct for safe page rele 138 * @work: work_struct for safe page release 139 * 139 * 140 * This structure is allocated at pool creatio 140 * This structure is allocated at pool creation time and maintains metadata 141 * pertaining to a particular z3fold pool. 141 * pertaining to a particular z3fold pool. 142 */ 142 */ 143 struct z3fold_pool { 143 struct z3fold_pool { 144 const char *name; 144 const char *name; 145 spinlock_t lock; 145 spinlock_t lock; 146 spinlock_t stale_lock; 146 spinlock_t stale_lock; 147 struct list_head __percpu *unbuddied; 147 struct list_head __percpu *unbuddied; 148 struct list_head stale; 148 struct list_head stale; 149 atomic64_t pages_nr; 149 atomic64_t pages_nr; 150 struct kmem_cache *c_handle; 150 struct kmem_cache *c_handle; 151 struct workqueue_struct *compact_wq; 151 struct workqueue_struct *compact_wq; 152 struct workqueue_struct *release_wq; 152 struct workqueue_struct *release_wq; 153 struct work_struct work; 153 struct work_struct work; 154 }; 154 }; 155 155 156 /* 156 /* 157 * Internal z3fold page flags 157 * Internal z3fold page flags 158 */ 158 */ 159 enum z3fold_page_flags { 159 enum z3fold_page_flags { 160 PAGE_HEADLESS = 0, 160 PAGE_HEADLESS = 0, 161 MIDDLE_CHUNK_MAPPED, 161 MIDDLE_CHUNK_MAPPED, 162 NEEDS_COMPACTING, 162 NEEDS_COMPACTING, 163 PAGE_STALE, 163 PAGE_STALE, 164 PAGE_CLAIMED, /* by either reclaim or 164 PAGE_CLAIMED, /* by either reclaim or free */ 165 PAGE_MIGRATED, /* page is migrated and 165 PAGE_MIGRATED, /* page is migrated and soon to be released */ 166 }; 166 }; 167 167 168 /* 168 /* 169 * handle flags, go under HANDLE_FLAG_MASK 169 * handle flags, go under HANDLE_FLAG_MASK 170 */ 170 */ 171 enum z3fold_handle_flags { 171 enum z3fold_handle_flags { 172 HANDLES_NOFREE = 0, 172 HANDLES_NOFREE = 0, 173 }; 173 }; 174 174 175 /* 175 /* 176 * Forward declarations 176 * Forward declarations 177 */ 177 */ 178 static struct z3fold_header *__z3fold_alloc(st 178 static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 179 static void compact_page_work(struct work_stru 179 static void compact_page_work(struct work_struct *w); 180 180 181 /***************** 181 /***************** 182 * Helpers 182 * Helpers 183 *****************/ 183 *****************/ 184 184 185 /* Converts an allocation size in bytes to siz 185 /* Converts an allocation size in bytes to size in z3fold chunks */ 186 static int size_to_chunks(size_t size) 186 static int size_to_chunks(size_t size) 187 { 187 { 188 return (size + CHUNK_SIZE - 1) >> CHUN 188 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 189 } 189 } 190 190 191 #define for_each_unbuddied_list(_iter, _begin) 191 #define for_each_unbuddied_list(_iter, _begin) \ 192 for ((_iter) = (_begin); (_iter) < NCH 192 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 193 193 194 static inline struct z3fold_buddy_slots *alloc 194 static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 195 195 gfp_t gfp) 196 { 196 { 197 struct z3fold_buddy_slots *slots = kme 197 struct z3fold_buddy_slots *slots = kmem_cache_zalloc(pool->c_handle, 198 198 gfp); 199 199 200 if (slots) { 200 if (slots) { 201 /* It will be freed separately 201 /* It will be freed separately in free_handle(). */ 202 kmemleak_not_leak(slots); 202 kmemleak_not_leak(slots); 203 slots->pool = (unsigned long)p 203 slots->pool = (unsigned long)pool; 204 rwlock_init(&slots->lock); 204 rwlock_init(&slots->lock); 205 } 205 } 206 206 207 return slots; 207 return slots; 208 } 208 } 209 209 210 static inline struct z3fold_pool *slots_to_poo 210 static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 211 { 211 { 212 return (struct z3fold_pool *)(s->pool 212 return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 213 } 213 } 214 214 215 static inline struct z3fold_buddy_slots *handl 215 static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 216 { 216 { 217 return (struct z3fold_buddy_slots *)(h 217 return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 218 } 218 } 219 219 220 /* Lock a z3fold page */ 220 /* Lock a z3fold page */ 221 static inline void z3fold_page_lock(struct z3f 221 static inline void z3fold_page_lock(struct z3fold_header *zhdr) 222 { 222 { 223 spin_lock(&zhdr->page_lock); 223 spin_lock(&zhdr->page_lock); 224 } 224 } 225 225 226 /* Try to lock a z3fold page */ 226 /* Try to lock a z3fold page */ 227 static inline int z3fold_page_trylock(struct z 227 static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 228 { 228 { 229 return spin_trylock(&zhdr->page_lock); 229 return spin_trylock(&zhdr->page_lock); 230 } 230 } 231 231 232 /* Unlock a z3fold page */ 232 /* Unlock a z3fold page */ 233 static inline void z3fold_page_unlock(struct z 233 static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 234 { 234 { 235 spin_unlock(&zhdr->page_lock); 235 spin_unlock(&zhdr->page_lock); 236 } 236 } 237 237 238 /* return locked z3fold page if it's not headl 238 /* return locked z3fold page if it's not headless */ 239 static inline struct z3fold_header *get_z3fold 239 static inline struct z3fold_header *get_z3fold_header(unsigned long handle) 240 { 240 { 241 struct z3fold_buddy_slots *slots; 241 struct z3fold_buddy_slots *slots; 242 struct z3fold_header *zhdr; 242 struct z3fold_header *zhdr; 243 int locked = 0; 243 int locked = 0; 244 244 245 if (!(handle & (1 << PAGE_HEADLESS))) 245 if (!(handle & (1 << PAGE_HEADLESS))) { 246 slots = handle_to_slots(handle 246 slots = handle_to_slots(handle); 247 do { 247 do { 248 unsigned long addr; 248 unsigned long addr; 249 249 250 read_lock(&slots->lock 250 read_lock(&slots->lock); 251 addr = *(unsigned long 251 addr = *(unsigned long *)handle; 252 zhdr = (struct z3fold_ 252 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 253 locked = z3fold_page_t 253 locked = z3fold_page_trylock(zhdr); 254 read_unlock(&slots->lo 254 read_unlock(&slots->lock); 255 if (locked) { 255 if (locked) { 256 struct page *p 256 struct page *page = virt_to_page(zhdr); 257 257 258 if (!test_bit( 258 if (!test_bit(PAGE_MIGRATED, &page->private)) 259 break; 259 break; 260 z3fold_page_un 260 z3fold_page_unlock(zhdr); 261 } 261 } 262 cpu_relax(); 262 cpu_relax(); 263 } while (true); 263 } while (true); 264 } else { 264 } else { 265 zhdr = (struct z3fold_header * 265 zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 266 } 266 } 267 267 268 return zhdr; 268 return zhdr; 269 } 269 } 270 270 271 static inline void put_z3fold_header(struct z3 271 static inline void put_z3fold_header(struct z3fold_header *zhdr) 272 { 272 { 273 struct page *page = virt_to_page(zhdr) 273 struct page *page = virt_to_page(zhdr); 274 274 275 if (!test_bit(PAGE_HEADLESS, &page->pr 275 if (!test_bit(PAGE_HEADLESS, &page->private)) 276 z3fold_page_unlock(zhdr); 276 z3fold_page_unlock(zhdr); 277 } 277 } 278 278 279 static inline void free_handle(unsigned long h 279 static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) 280 { 280 { 281 struct z3fold_buddy_slots *slots; 281 struct z3fold_buddy_slots *slots; 282 int i; 282 int i; 283 bool is_free; 283 bool is_free; 284 284 285 if (WARN_ON(*(unsigned long *)handle = 285 if (WARN_ON(*(unsigned long *)handle == 0)) 286 return; 286 return; 287 287 288 slots = handle_to_slots(handle); 288 slots = handle_to_slots(handle); 289 write_lock(&slots->lock); 289 write_lock(&slots->lock); 290 *(unsigned long *)handle = 0; 290 *(unsigned long *)handle = 0; 291 291 292 if (test_bit(HANDLES_NOFREE, &slots->p 292 if (test_bit(HANDLES_NOFREE, &slots->pool)) { 293 write_unlock(&slots->lock); 293 write_unlock(&slots->lock); 294 return; /* simple case, nothin 294 return; /* simple case, nothing else to do */ 295 } 295 } 296 296 297 if (zhdr->slots != slots) 297 if (zhdr->slots != slots) 298 zhdr->foreign_handles--; 298 zhdr->foreign_handles--; 299 299 300 is_free = true; 300 is_free = true; 301 for (i = 0; i <= BUDDY_MASK; i++) { 301 for (i = 0; i <= BUDDY_MASK; i++) { 302 if (slots->slot[i]) { 302 if (slots->slot[i]) { 303 is_free = false; 303 is_free = false; 304 break; 304 break; 305 } 305 } 306 } 306 } 307 write_unlock(&slots->lock); 307 write_unlock(&slots->lock); 308 308 309 if (is_free) { 309 if (is_free) { 310 struct z3fold_pool *pool = slo 310 struct z3fold_pool *pool = slots_to_pool(slots); 311 311 312 if (zhdr->slots == slots) 312 if (zhdr->slots == slots) 313 zhdr->slots = NULL; 313 zhdr->slots = NULL; 314 kmem_cache_free(pool->c_handle 314 kmem_cache_free(pool->c_handle, slots); 315 } 315 } 316 } 316 } 317 317 318 /* Initializes the z3fold header of a newly al 318 /* Initializes the z3fold header of a newly allocated z3fold page */ 319 static struct z3fold_header *init_z3fold_page( 319 static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 320 struct 320 struct z3fold_pool *pool, gfp_t gfp) 321 { 321 { 322 struct z3fold_header *zhdr = page_addr 322 struct z3fold_header *zhdr = page_address(page); 323 struct z3fold_buddy_slots *slots; 323 struct z3fold_buddy_slots *slots; 324 324 325 clear_bit(PAGE_HEADLESS, &page->privat 325 clear_bit(PAGE_HEADLESS, &page->private); 326 clear_bit(MIDDLE_CHUNK_MAPPED, &page-> 326 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 327 clear_bit(NEEDS_COMPACTING, &page->pri 327 clear_bit(NEEDS_COMPACTING, &page->private); 328 clear_bit(PAGE_STALE, &page->private); 328 clear_bit(PAGE_STALE, &page->private); 329 clear_bit(PAGE_CLAIMED, &page->private 329 clear_bit(PAGE_CLAIMED, &page->private); 330 clear_bit(PAGE_MIGRATED, &page->privat 330 clear_bit(PAGE_MIGRATED, &page->private); 331 if (headless) 331 if (headless) 332 return zhdr; 332 return zhdr; 333 333 334 slots = alloc_slots(pool, gfp); 334 slots = alloc_slots(pool, gfp); 335 if (!slots) 335 if (!slots) 336 return NULL; 336 return NULL; 337 337 338 memset(zhdr, 0, sizeof(*zhdr)); 338 memset(zhdr, 0, sizeof(*zhdr)); 339 spin_lock_init(&zhdr->page_lock); 339 spin_lock_init(&zhdr->page_lock); 340 kref_init(&zhdr->refcount); 340 kref_init(&zhdr->refcount); 341 zhdr->cpu = -1; 341 zhdr->cpu = -1; 342 zhdr->slots = slots; 342 zhdr->slots = slots; 343 zhdr->pool = pool; 343 zhdr->pool = pool; 344 INIT_LIST_HEAD(&zhdr->buddy); 344 INIT_LIST_HEAD(&zhdr->buddy); 345 INIT_WORK(&zhdr->work, compact_page_wo 345 INIT_WORK(&zhdr->work, compact_page_work); 346 return zhdr; 346 return zhdr; 347 } 347 } 348 348 349 /* Resets the struct page fields and frees the 349 /* Resets the struct page fields and frees the page */ 350 static void free_z3fold_page(struct page *page 350 static void free_z3fold_page(struct page *page, bool headless) 351 { 351 { 352 if (!headless) { 352 if (!headless) { 353 lock_page(page); 353 lock_page(page); 354 __ClearPageMovable(page); 354 __ClearPageMovable(page); 355 unlock_page(page); 355 unlock_page(page); 356 } 356 } 357 __free_page(page); 357 __free_page(page); 358 } 358 } 359 359 360 /* Helper function to build the index */ 360 /* Helper function to build the index */ 361 static inline int __idx(struct z3fold_header * 361 static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 362 { 362 { 363 return (bud + zhdr->first_num) & BUDDY 363 return (bud + zhdr->first_num) & BUDDY_MASK; 364 } 364 } 365 365 366 /* 366 /* 367 * Encodes the handle of a particular buddy wi 367 * Encodes the handle of a particular buddy within a z3fold page. 368 * Zhdr->page_lock should be held as this func 368 * Zhdr->page_lock should be held as this function accesses first_num 369 * if bud != HEADLESS. 369 * if bud != HEADLESS. 370 */ 370 */ 371 static unsigned long __encode_handle(struct z3 371 static unsigned long __encode_handle(struct z3fold_header *zhdr, 372 struct z3fold_ 372 struct z3fold_buddy_slots *slots, 373 enum buddy bud 373 enum buddy bud) 374 { 374 { 375 unsigned long h = (unsigned long)zhdr; 375 unsigned long h = (unsigned long)zhdr; 376 int idx = 0; 376 int idx = 0; 377 377 378 /* 378 /* 379 * For a headless page, its handle is 379 * For a headless page, its handle is its pointer with the extra 380 * PAGE_HEADLESS bit set 380 * PAGE_HEADLESS bit set 381 */ 381 */ 382 if (bud == HEADLESS) 382 if (bud == HEADLESS) 383 return h | (1 << PAGE_HEADLESS 383 return h | (1 << PAGE_HEADLESS); 384 384 385 /* otherwise, return pointer to encode 385 /* otherwise, return pointer to encoded handle */ 386 idx = __idx(zhdr, bud); 386 idx = __idx(zhdr, bud); 387 h += idx; 387 h += idx; 388 if (bud == LAST) 388 if (bud == LAST) 389 h |= (zhdr->last_chunks << BUD 389 h |= (zhdr->last_chunks << BUDDY_SHIFT); 390 390 391 write_lock(&slots->lock); 391 write_lock(&slots->lock); 392 slots->slot[idx] = h; 392 slots->slot[idx] = h; 393 write_unlock(&slots->lock); 393 write_unlock(&slots->lock); 394 return (unsigned long)&slots->slot[idx 394 return (unsigned long)&slots->slot[idx]; 395 } 395 } 396 396 397 static unsigned long encode_handle(struct z3fo 397 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 398 { 398 { 399 return __encode_handle(zhdr, zhdr->slo 399 return __encode_handle(zhdr, zhdr->slots, bud); 400 } 400 } 401 401 402 /* only for LAST bud, returns zero otherwise * 402 /* only for LAST bud, returns zero otherwise */ 403 static unsigned short handle_to_chunks(unsigne 403 static unsigned short handle_to_chunks(unsigned long handle) 404 { 404 { 405 struct z3fold_buddy_slots *slots = han 405 struct z3fold_buddy_slots *slots = handle_to_slots(handle); 406 unsigned long addr; 406 unsigned long addr; 407 407 408 read_lock(&slots->lock); 408 read_lock(&slots->lock); 409 addr = *(unsigned long *)handle; 409 addr = *(unsigned long *)handle; 410 read_unlock(&slots->lock); 410 read_unlock(&slots->lock); 411 return (addr & ~PAGE_MASK) >> BUDDY_SH 411 return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 412 } 412 } 413 413 414 /* 414 /* 415 * (handle & BUDDY_MASK) < zhdr->first_num is 415 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 416 * but that doesn't matter. because the maski 416 * but that doesn't matter. because the masking will result in the 417 * correct buddy number. 417 * correct buddy number. 418 */ 418 */ 419 static enum buddy handle_to_buddy(unsigned lon 419 static enum buddy handle_to_buddy(unsigned long handle) 420 { 420 { 421 struct z3fold_header *zhdr; 421 struct z3fold_header *zhdr; 422 struct z3fold_buddy_slots *slots = han 422 struct z3fold_buddy_slots *slots = handle_to_slots(handle); 423 unsigned long addr; 423 unsigned long addr; 424 424 425 read_lock(&slots->lock); 425 read_lock(&slots->lock); 426 WARN_ON(handle & (1 << PAGE_HEADLESS)) 426 WARN_ON(handle & (1 << PAGE_HEADLESS)); 427 addr = *(unsigned long *)handle; 427 addr = *(unsigned long *)handle; 428 read_unlock(&slots->lock); 428 read_unlock(&slots->lock); 429 zhdr = (struct z3fold_header *)(addr & 429 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 430 return (addr - zhdr->first_num) & BUDD 430 return (addr - zhdr->first_num) & BUDDY_MASK; 431 } 431 } 432 432 433 static inline struct z3fold_pool *zhdr_to_pool 433 static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 434 { 434 { 435 return zhdr->pool; 435 return zhdr->pool; 436 } 436 } 437 437 438 static void __release_z3fold_page(struct z3fol 438 static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 439 { 439 { 440 struct page *page = virt_to_page(zhdr) 440 struct page *page = virt_to_page(zhdr); 441 struct z3fold_pool *pool = zhdr_to_poo 441 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 442 442 443 WARN_ON(!list_empty(&zhdr->buddy)); 443 WARN_ON(!list_empty(&zhdr->buddy)); 444 set_bit(PAGE_STALE, &page->private); 444 set_bit(PAGE_STALE, &page->private); 445 clear_bit(NEEDS_COMPACTING, &page->pri 445 clear_bit(NEEDS_COMPACTING, &page->private); 446 spin_lock(&pool->lock); 446 spin_lock(&pool->lock); 447 spin_unlock(&pool->lock); 447 spin_unlock(&pool->lock); 448 448 449 if (locked) 449 if (locked) 450 z3fold_page_unlock(zhdr); 450 z3fold_page_unlock(zhdr); 451 451 452 spin_lock(&pool->stale_lock); 452 spin_lock(&pool->stale_lock); 453 list_add(&zhdr->buddy, &pool->stale); 453 list_add(&zhdr->buddy, &pool->stale); 454 queue_work(pool->release_wq, &pool->wo 454 queue_work(pool->release_wq, &pool->work); 455 spin_unlock(&pool->stale_lock); 455 spin_unlock(&pool->stale_lock); 456 456 457 atomic64_dec(&pool->pages_nr); 457 atomic64_dec(&pool->pages_nr); 458 } 458 } 459 459 460 static void release_z3fold_page_locked(struct 460 static void release_z3fold_page_locked(struct kref *ref) 461 { 461 { 462 struct z3fold_header *zhdr = container 462 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 463 463 refcount); 464 WARN_ON(z3fold_page_trylock(zhdr)); 464 WARN_ON(z3fold_page_trylock(zhdr)); 465 __release_z3fold_page(zhdr, true); 465 __release_z3fold_page(zhdr, true); 466 } 466 } 467 467 468 static void release_z3fold_page_locked_list(st 468 static void release_z3fold_page_locked_list(struct kref *ref) 469 { 469 { 470 struct z3fold_header *zhdr = container 470 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 471 471 refcount); 472 struct z3fold_pool *pool = zhdr_to_poo 472 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 473 473 474 spin_lock(&pool->lock); 474 spin_lock(&pool->lock); 475 list_del_init(&zhdr->buddy); 475 list_del_init(&zhdr->buddy); 476 spin_unlock(&pool->lock); 476 spin_unlock(&pool->lock); 477 477 478 WARN_ON(z3fold_page_trylock(zhdr)); 478 WARN_ON(z3fold_page_trylock(zhdr)); 479 __release_z3fold_page(zhdr, true); 479 __release_z3fold_page(zhdr, true); 480 } 480 } 481 481 482 static inline int put_z3fold_locked(struct z3f 482 static inline int put_z3fold_locked(struct z3fold_header *zhdr) 483 { 483 { 484 return kref_put(&zhdr->refcount, relea 484 return kref_put(&zhdr->refcount, release_z3fold_page_locked); 485 } 485 } 486 486 487 static inline int put_z3fold_locked_list(struc 487 static inline int put_z3fold_locked_list(struct z3fold_header *zhdr) 488 { 488 { 489 return kref_put(&zhdr->refcount, relea 489 return kref_put(&zhdr->refcount, release_z3fold_page_locked_list); 490 } 490 } 491 491 492 static void free_pages_work(struct work_struct 492 static void free_pages_work(struct work_struct *w) 493 { 493 { 494 struct z3fold_pool *pool = container_o 494 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 495 495 496 spin_lock(&pool->stale_lock); 496 spin_lock(&pool->stale_lock); 497 while (!list_empty(&pool->stale)) { 497 while (!list_empty(&pool->stale)) { 498 struct z3fold_header *zhdr = l 498 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 499 499 struct z3fold_header, buddy); 500 struct page *page = virt_to_pa 500 struct page *page = virt_to_page(zhdr); 501 501 502 list_del(&zhdr->buddy); 502 list_del(&zhdr->buddy); 503 if (WARN_ON(!test_bit(PAGE_STA 503 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 504 continue; 504 continue; 505 spin_unlock(&pool->stale_lock) 505 spin_unlock(&pool->stale_lock); 506 cancel_work_sync(&zhdr->work); 506 cancel_work_sync(&zhdr->work); 507 free_z3fold_page(page, false); 507 free_z3fold_page(page, false); 508 cond_resched(); 508 cond_resched(); 509 spin_lock(&pool->stale_lock); 509 spin_lock(&pool->stale_lock); 510 } 510 } 511 spin_unlock(&pool->stale_lock); 511 spin_unlock(&pool->stale_lock); 512 } 512 } 513 513 514 /* 514 /* 515 * Returns the number of free chunks in a z3fo 515 * Returns the number of free chunks in a z3fold page. 516 * NB: can't be used with HEADLESS pages. 516 * NB: can't be used with HEADLESS pages. 517 */ 517 */ 518 static int num_free_chunks(struct z3fold_heade 518 static int num_free_chunks(struct z3fold_header *zhdr) 519 { 519 { 520 int nfree; 520 int nfree; 521 /* 521 /* 522 * If there is a middle object, pick u 522 * If there is a middle object, pick up the bigger free space 523 * either before or after it. Otherwis 523 * either before or after it. Otherwise just subtract the number 524 * of chunks occupied by the first and 524 * of chunks occupied by the first and the last objects. 525 */ 525 */ 526 if (zhdr->middle_chunks != 0) { 526 if (zhdr->middle_chunks != 0) { 527 int nfree_before = zhdr->first 527 int nfree_before = zhdr->first_chunks ? 528 0 : zhdr->start_middle 528 0 : zhdr->start_middle - ZHDR_CHUNKS; 529 int nfree_after = zhdr->last_c 529 int nfree_after = zhdr->last_chunks ? 530 0 : TOTAL_CHUNKS - 530 0 : TOTAL_CHUNKS - 531 (zhdr->start_m 531 (zhdr->start_middle + zhdr->middle_chunks); 532 nfree = max(nfree_before, nfre 532 nfree = max(nfree_before, nfree_after); 533 } else 533 } else 534 nfree = NCHUNKS - zhdr->first_ 534 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 535 return nfree; 535 return nfree; 536 } 536 } 537 537 538 /* Add to the appropriate unbuddied list */ 538 /* Add to the appropriate unbuddied list */ 539 static inline void add_to_unbuddied(struct z3f 539 static inline void add_to_unbuddied(struct z3fold_pool *pool, 540 struct z3fold_ 540 struct z3fold_header *zhdr) 541 { 541 { 542 if (zhdr->first_chunks == 0 || zhdr->l 542 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 543 zhdr->middle_chunks == 543 zhdr->middle_chunks == 0) { 544 struct list_head *unbuddied; 544 struct list_head *unbuddied; 545 int freechunks = num_free_chun 545 int freechunks = num_free_chunks(zhdr); 546 546 547 migrate_disable(); 547 migrate_disable(); 548 unbuddied = this_cpu_ptr(pool- 548 unbuddied = this_cpu_ptr(pool->unbuddied); 549 spin_lock(&pool->lock); 549 spin_lock(&pool->lock); 550 list_add(&zhdr->buddy, &unbudd 550 list_add(&zhdr->buddy, &unbuddied[freechunks]); 551 spin_unlock(&pool->lock); 551 spin_unlock(&pool->lock); 552 zhdr->cpu = smp_processor_id() 552 zhdr->cpu = smp_processor_id(); 553 migrate_enable(); 553 migrate_enable(); 554 } 554 } 555 } 555 } 556 556 557 static inline enum buddy get_free_buddy(struct 557 static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) 558 { 558 { 559 enum buddy bud = HEADLESS; 559 enum buddy bud = HEADLESS; 560 560 561 if (zhdr->middle_chunks) { 561 if (zhdr->middle_chunks) { 562 if (!zhdr->first_chunks && 562 if (!zhdr->first_chunks && 563 chunks <= zhdr->start_midd 563 chunks <= zhdr->start_middle - ZHDR_CHUNKS) 564 bud = FIRST; 564 bud = FIRST; 565 else if (!zhdr->last_chunks) 565 else if (!zhdr->last_chunks) 566 bud = LAST; 566 bud = LAST; 567 } else { 567 } else { 568 if (!zhdr->first_chunks) 568 if (!zhdr->first_chunks) 569 bud = FIRST; 569 bud = FIRST; 570 else if (!zhdr->last_chunks) 570 else if (!zhdr->last_chunks) 571 bud = LAST; 571 bud = LAST; 572 else 572 else 573 bud = MIDDLE; 573 bud = MIDDLE; 574 } 574 } 575 575 576 return bud; 576 return bud; 577 } 577 } 578 578 579 static inline void *mchunk_memmove(struct z3fo 579 static inline void *mchunk_memmove(struct z3fold_header *zhdr, 580 unsigned short 580 unsigned short dst_chunk) 581 { 581 { 582 void *beg = zhdr; 582 void *beg = zhdr; 583 return memmove(beg + (dst_chunk << CHU 583 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 584 beg + (zhdr->start_midd 584 beg + (zhdr->start_middle << CHUNK_SHIFT), 585 zhdr->middle_chunks << 585 zhdr->middle_chunks << CHUNK_SHIFT); 586 } 586 } 587 587 588 static inline bool buddy_single(struct z3fold_ 588 static inline bool buddy_single(struct z3fold_header *zhdr) 589 { 589 { 590 return !((zhdr->first_chunks && zhdr-> 590 return !((zhdr->first_chunks && zhdr->middle_chunks) || 591 (zhdr->first_chunks && 591 (zhdr->first_chunks && zhdr->last_chunks) || 592 (zhdr->middle_chunks & 592 (zhdr->middle_chunks && zhdr->last_chunks)); 593 } 593 } 594 594 595 static struct z3fold_header *compact_single_bu 595 static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 596 { 596 { 597 struct z3fold_pool *pool = zhdr_to_poo 597 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 598 void *p = zhdr; 598 void *p = zhdr; 599 unsigned long old_handle = 0; 599 unsigned long old_handle = 0; 600 size_t sz = 0; 600 size_t sz = 0; 601 struct z3fold_header *new_zhdr = NULL; 601 struct z3fold_header *new_zhdr = NULL; 602 int first_idx = __idx(zhdr, FIRST); 602 int first_idx = __idx(zhdr, FIRST); 603 int middle_idx = __idx(zhdr, MIDDLE); 603 int middle_idx = __idx(zhdr, MIDDLE); 604 int last_idx = __idx(zhdr, LAST); 604 int last_idx = __idx(zhdr, LAST); 605 unsigned short *moved_chunks = NULL; 605 unsigned short *moved_chunks = NULL; 606 606 607 /* 607 /* 608 * No need to protect slots here -- al 608 * No need to protect slots here -- all the slots are "local" and 609 * the page lock is already taken 609 * the page lock is already taken 610 */ 610 */ 611 if (zhdr->first_chunks && zhdr->slots- 611 if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 612 p += ZHDR_SIZE_ALIGNED; 612 p += ZHDR_SIZE_ALIGNED; 613 sz = zhdr->first_chunks << CHU 613 sz = zhdr->first_chunks << CHUNK_SHIFT; 614 old_handle = (unsigned long)&z 614 old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 615 moved_chunks = &zhdr->first_ch 615 moved_chunks = &zhdr->first_chunks; 616 } else if (zhdr->middle_chunks && zhdr 616 } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 617 p += zhdr->start_middle << CHU 617 p += zhdr->start_middle << CHUNK_SHIFT; 618 sz = zhdr->middle_chunks << CH 618 sz = zhdr->middle_chunks << CHUNK_SHIFT; 619 old_handle = (unsigned long)&z 619 old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 620 moved_chunks = &zhdr->middle_c 620 moved_chunks = &zhdr->middle_chunks; 621 } else if (zhdr->last_chunks && zhdr-> 621 } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 622 p += PAGE_SIZE - (zhdr->last_c 622 p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 623 sz = zhdr->last_chunks << CHUN 623 sz = zhdr->last_chunks << CHUNK_SHIFT; 624 old_handle = (unsigned long)&z 624 old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 625 moved_chunks = &zhdr->last_chu 625 moved_chunks = &zhdr->last_chunks; 626 } 626 } 627 627 628 if (sz > 0) { 628 if (sz > 0) { 629 enum buddy new_bud = HEADLESS; 629 enum buddy new_bud = HEADLESS; 630 short chunks = size_to_chunks( 630 short chunks = size_to_chunks(sz); 631 void *q; 631 void *q; 632 632 633 new_zhdr = __z3fold_alloc(pool 633 new_zhdr = __z3fold_alloc(pool, sz, false); 634 if (!new_zhdr) 634 if (!new_zhdr) 635 return NULL; 635 return NULL; 636 636 637 if (WARN_ON(new_zhdr == zhdr)) 637 if (WARN_ON(new_zhdr == zhdr)) 638 goto out_fail; 638 goto out_fail; 639 639 640 new_bud = get_free_buddy(new_z 640 new_bud = get_free_buddy(new_zhdr, chunks); 641 q = new_zhdr; 641 q = new_zhdr; 642 switch (new_bud) { 642 switch (new_bud) { 643 case FIRST: 643 case FIRST: 644 new_zhdr->first_chunks 644 new_zhdr->first_chunks = chunks; 645 q += ZHDR_SIZE_ALIGNED 645 q += ZHDR_SIZE_ALIGNED; 646 break; 646 break; 647 case MIDDLE: 647 case MIDDLE: 648 new_zhdr->middle_chunk 648 new_zhdr->middle_chunks = chunks; 649 new_zhdr->start_middle 649 new_zhdr->start_middle = 650 new_zhdr->firs 650 new_zhdr->first_chunks + ZHDR_CHUNKS; 651 q += new_zhdr->start_m 651 q += new_zhdr->start_middle << CHUNK_SHIFT; 652 break; 652 break; 653 case LAST: 653 case LAST: 654 new_zhdr->last_chunks 654 new_zhdr->last_chunks = chunks; 655 q += PAGE_SIZE - (new_ 655 q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 656 break; 656 break; 657 default: 657 default: 658 goto out_fail; 658 goto out_fail; 659 } 659 } 660 new_zhdr->foreign_handles++; 660 new_zhdr->foreign_handles++; 661 memcpy(q, p, sz); 661 memcpy(q, p, sz); 662 write_lock(&zhdr->slots->lock) 662 write_lock(&zhdr->slots->lock); 663 *(unsigned long *)old_handle = 663 *(unsigned long *)old_handle = (unsigned long)new_zhdr + 664 __idx(new_zhdr, new_bu 664 __idx(new_zhdr, new_bud); 665 if (new_bud == LAST) 665 if (new_bud == LAST) 666 *(unsigned long *)old_ 666 *(unsigned long *)old_handle |= 667 (new_z 667 (new_zhdr->last_chunks << BUDDY_SHIFT); 668 write_unlock(&zhdr->slots->loc 668 write_unlock(&zhdr->slots->lock); 669 add_to_unbuddied(pool, new_zhd 669 add_to_unbuddied(pool, new_zhdr); 670 z3fold_page_unlock(new_zhdr); 670 z3fold_page_unlock(new_zhdr); 671 671 672 *moved_chunks = 0; 672 *moved_chunks = 0; 673 } 673 } 674 674 675 return new_zhdr; 675 return new_zhdr; 676 676 677 out_fail: 677 out_fail: 678 if (new_zhdr && !put_z3fold_locked(new 678 if (new_zhdr && !put_z3fold_locked(new_zhdr)) { 679 add_to_unbuddied(pool, new_zhd 679 add_to_unbuddied(pool, new_zhdr); 680 z3fold_page_unlock(new_zhdr); 680 z3fold_page_unlock(new_zhdr); 681 } 681 } 682 return NULL; 682 return NULL; 683 683 684 } 684 } 685 685 686 #define BIG_CHUNK_GAP 3 686 #define BIG_CHUNK_GAP 3 687 /* Has to be called with lock held */ 687 /* Has to be called with lock held */ 688 static int z3fold_compact_page(struct z3fold_h 688 static int z3fold_compact_page(struct z3fold_header *zhdr) 689 { 689 { 690 struct page *page = virt_to_page(zhdr) 690 struct page *page = virt_to_page(zhdr); 691 691 692 if (test_bit(MIDDLE_CHUNK_MAPPED, &pag 692 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 693 return 0; /* can't move middle 693 return 0; /* can't move middle chunk, it's used */ 694 694 695 if (unlikely(PageIsolated(page))) 695 if (unlikely(PageIsolated(page))) 696 return 0; 696 return 0; 697 697 698 if (zhdr->middle_chunks == 0) 698 if (zhdr->middle_chunks == 0) 699 return 0; /* nothing to compac 699 return 0; /* nothing to compact */ 700 700 701 if (zhdr->first_chunks == 0 && zhdr->l 701 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 702 /* move to the beginning */ 702 /* move to the beginning */ 703 mchunk_memmove(zhdr, ZHDR_CHUN 703 mchunk_memmove(zhdr, ZHDR_CHUNKS); 704 zhdr->first_chunks = zhdr->mid 704 zhdr->first_chunks = zhdr->middle_chunks; 705 zhdr->middle_chunks = 0; 705 zhdr->middle_chunks = 0; 706 zhdr->start_middle = 0; 706 zhdr->start_middle = 0; 707 zhdr->first_num++; 707 zhdr->first_num++; 708 return 1; 708 return 1; 709 } 709 } 710 710 711 /* 711 /* 712 * moving data is expensive, so let's 712 * moving data is expensive, so let's only do that if 713 * there's substantial gain (at least 713 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 714 */ 714 */ 715 if (zhdr->first_chunks != 0 && zhdr->l 715 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 716 zhdr->start_middle - (zhdr->first_ 716 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 717 BIG_CHUNK_GAP) { 717 BIG_CHUNK_GAP) { 718 mchunk_memmove(zhdr, zhdr->fir 718 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 719 zhdr->start_middle = zhdr->fir 719 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 720 return 1; 720 return 1; 721 } else if (zhdr->last_chunks != 0 && z 721 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 722 TOTAL_CHUNKS - (zhdr->last_ 722 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 723 + zhdr 723 + zhdr->middle_chunks) >= 724 BIG_CHUNK_GAP) { 724 BIG_CHUNK_GAP) { 725 unsigned short new_start = TOT 725 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 726 zhdr->middle_chunks; 726 zhdr->middle_chunks; 727 mchunk_memmove(zhdr, new_start 727 mchunk_memmove(zhdr, new_start); 728 zhdr->start_middle = new_start 728 zhdr->start_middle = new_start; 729 return 1; 729 return 1; 730 } 730 } 731 731 732 return 0; 732 return 0; 733 } 733 } 734 734 735 static void do_compact_page(struct z3fold_head 735 static void do_compact_page(struct z3fold_header *zhdr, bool locked) 736 { 736 { 737 struct z3fold_pool *pool = zhdr_to_poo 737 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 738 struct page *page; 738 struct page *page; 739 739 740 page = virt_to_page(zhdr); 740 page = virt_to_page(zhdr); 741 if (locked) 741 if (locked) 742 WARN_ON(z3fold_page_trylock(zh 742 WARN_ON(z3fold_page_trylock(zhdr)); 743 else 743 else 744 z3fold_page_lock(zhdr); 744 z3fold_page_lock(zhdr); 745 if (WARN_ON(!test_and_clear_bit(NEEDS_ 745 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 746 z3fold_page_unlock(zhdr); 746 z3fold_page_unlock(zhdr); 747 return; 747 return; 748 } 748 } 749 spin_lock(&pool->lock); 749 spin_lock(&pool->lock); 750 list_del_init(&zhdr->buddy); 750 list_del_init(&zhdr->buddy); 751 spin_unlock(&pool->lock); 751 spin_unlock(&pool->lock); 752 752 753 if (put_z3fold_locked(zhdr)) 753 if (put_z3fold_locked(zhdr)) 754 return; 754 return; 755 755 756 if (test_bit(PAGE_STALE, &page->privat 756 if (test_bit(PAGE_STALE, &page->private) || 757 test_and_set_bit(PAGE_CLAIMED, &pa 757 test_and_set_bit(PAGE_CLAIMED, &page->private)) { 758 z3fold_page_unlock(zhdr); 758 z3fold_page_unlock(zhdr); 759 return; 759 return; 760 } 760 } 761 761 762 if (!zhdr->foreign_handles && buddy_si 762 if (!zhdr->foreign_handles && buddy_single(zhdr) && 763 zhdr->mapped_count == 0 && compact 763 zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 764 if (!put_z3fold_locked(zhdr)) 764 if (!put_z3fold_locked(zhdr)) { 765 clear_bit(PAGE_CLAIMED 765 clear_bit(PAGE_CLAIMED, &page->private); 766 z3fold_page_unlock(zhd 766 z3fold_page_unlock(zhdr); 767 } 767 } 768 return; 768 return; 769 } 769 } 770 770 771 z3fold_compact_page(zhdr); 771 z3fold_compact_page(zhdr); 772 add_to_unbuddied(pool, zhdr); 772 add_to_unbuddied(pool, zhdr); 773 clear_bit(PAGE_CLAIMED, &page->private 773 clear_bit(PAGE_CLAIMED, &page->private); 774 z3fold_page_unlock(zhdr); 774 z3fold_page_unlock(zhdr); 775 } 775 } 776 776 777 static void compact_page_work(struct work_stru 777 static void compact_page_work(struct work_struct *w) 778 { 778 { 779 struct z3fold_header *zhdr = container 779 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 780 780 work); 781 781 782 do_compact_page(zhdr, false); 782 do_compact_page(zhdr, false); 783 } 783 } 784 784 785 /* returns _locked_ z3fold page header or NULL 785 /* returns _locked_ z3fold page header or NULL */ 786 static inline struct z3fold_header *__z3fold_a 786 static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 787 787 size_t size, bool can_sleep) 788 { 788 { 789 struct z3fold_header *zhdr = NULL; 789 struct z3fold_header *zhdr = NULL; 790 struct page *page; 790 struct page *page; 791 struct list_head *unbuddied; 791 struct list_head *unbuddied; 792 int chunks = size_to_chunks(size), i; 792 int chunks = size_to_chunks(size), i; 793 793 794 lookup: 794 lookup: 795 migrate_disable(); 795 migrate_disable(); 796 /* First, try to find an unbuddied z3f 796 /* First, try to find an unbuddied z3fold page. */ 797 unbuddied = this_cpu_ptr(pool->unbuddi 797 unbuddied = this_cpu_ptr(pool->unbuddied); 798 for_each_unbuddied_list(i, chunks) { 798 for_each_unbuddied_list(i, chunks) { 799 struct list_head *l = &unbuddi 799 struct list_head *l = &unbuddied[i]; 800 800 801 zhdr = list_first_entry_or_nul 801 zhdr = list_first_entry_or_null(READ_ONCE(l), 802 struct 802 struct z3fold_header, buddy); 803 803 804 if (!zhdr) 804 if (!zhdr) 805 continue; 805 continue; 806 806 807 /* Re-check under lock. */ 807 /* Re-check under lock. */ 808 spin_lock(&pool->lock); 808 spin_lock(&pool->lock); 809 if (unlikely(zhdr != list_firs 809 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 810 810 struct z3fold_header, buddy)) || 811 !z3fold_page_trylock(zhdr) 811 !z3fold_page_trylock(zhdr)) { 812 spin_unlock(&pool->loc 812 spin_unlock(&pool->lock); 813 zhdr = NULL; 813 zhdr = NULL; 814 migrate_enable(); 814 migrate_enable(); 815 if (can_sleep) 815 if (can_sleep) 816 cond_resched() 816 cond_resched(); 817 goto lookup; 817 goto lookup; 818 } 818 } 819 list_del_init(&zhdr->buddy); 819 list_del_init(&zhdr->buddy); 820 zhdr->cpu = -1; 820 zhdr->cpu = -1; 821 spin_unlock(&pool->lock); 821 spin_unlock(&pool->lock); 822 822 823 page = virt_to_page(zhdr); 823 page = virt_to_page(zhdr); 824 if (test_bit(NEEDS_COMPACTING, 824 if (test_bit(NEEDS_COMPACTING, &page->private) || 825 test_bit(PAGE_CLAIMED, &pa 825 test_bit(PAGE_CLAIMED, &page->private)) { 826 z3fold_page_unlock(zhd 826 z3fold_page_unlock(zhdr); 827 zhdr = NULL; 827 zhdr = NULL; 828 migrate_enable(); 828 migrate_enable(); 829 if (can_sleep) 829 if (can_sleep) 830 cond_resched() 830 cond_resched(); 831 goto lookup; 831 goto lookup; 832 } 832 } 833 833 834 /* 834 /* 835 * this page could not be remo 835 * this page could not be removed from its unbuddied 836 * list while pool lock was he 836 * list while pool lock was held, and then we've taken 837 * page lock so kref_put could 837 * page lock so kref_put could not be called before 838 * we got here, so it's safe t 838 * we got here, so it's safe to just call kref_get() 839 */ 839 */ 840 kref_get(&zhdr->refcount); 840 kref_get(&zhdr->refcount); 841 break; 841 break; 842 } 842 } 843 migrate_enable(); 843 migrate_enable(); 844 844 845 if (!zhdr) { 845 if (!zhdr) { 846 int cpu; 846 int cpu; 847 847 848 /* look for _exact_ match on o 848 /* look for _exact_ match on other cpus' lists */ 849 for_each_online_cpu(cpu) { 849 for_each_online_cpu(cpu) { 850 struct list_head *l; 850 struct list_head *l; 851 851 852 unbuddied = per_cpu_pt 852 unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 853 spin_lock(&pool->lock) 853 spin_lock(&pool->lock); 854 l = &unbuddied[chunks] 854 l = &unbuddied[chunks]; 855 855 856 zhdr = list_first_entr 856 zhdr = list_first_entry_or_null(READ_ONCE(l), 857 857 struct z3fold_header, buddy); 858 858 859 if (!zhdr || !z3fold_p 859 if (!zhdr || !z3fold_page_trylock(zhdr)) { 860 spin_unlock(&p 860 spin_unlock(&pool->lock); 861 zhdr = NULL; 861 zhdr = NULL; 862 continue; 862 continue; 863 } 863 } 864 list_del_init(&zhdr->b 864 list_del_init(&zhdr->buddy); 865 zhdr->cpu = -1; 865 zhdr->cpu = -1; 866 spin_unlock(&pool->loc 866 spin_unlock(&pool->lock); 867 867 868 page = virt_to_page(zh 868 page = virt_to_page(zhdr); 869 if (test_bit(NEEDS_COM 869 if (test_bit(NEEDS_COMPACTING, &page->private) || 870 test_bit(PAGE_CLAI 870 test_bit(PAGE_CLAIMED, &page->private)) { 871 z3fold_page_un 871 z3fold_page_unlock(zhdr); 872 zhdr = NULL; 872 zhdr = NULL; 873 if (can_sleep) 873 if (can_sleep) 874 cond_r 874 cond_resched(); 875 continue; 875 continue; 876 } 876 } 877 kref_get(&zhdr->refcou 877 kref_get(&zhdr->refcount); 878 break; 878 break; 879 } 879 } 880 } 880 } 881 881 882 if (zhdr && !zhdr->slots) { 882 if (zhdr && !zhdr->slots) { 883 zhdr->slots = alloc_slots(pool 883 zhdr->slots = alloc_slots(pool, GFP_ATOMIC); 884 if (!zhdr->slots) 884 if (!zhdr->slots) 885 goto out_fail; 885 goto out_fail; 886 } 886 } 887 return zhdr; 887 return zhdr; 888 888 889 out_fail: 889 out_fail: 890 if (!put_z3fold_locked(zhdr)) { 890 if (!put_z3fold_locked(zhdr)) { 891 add_to_unbuddied(pool, zhdr); 891 add_to_unbuddied(pool, zhdr); 892 z3fold_page_unlock(zhdr); 892 z3fold_page_unlock(zhdr); 893 } 893 } 894 return NULL; 894 return NULL; 895 } 895 } 896 896 897 /* 897 /* 898 * API Functions 898 * API Functions 899 */ 899 */ 900 900 901 /** 901 /** 902 * z3fold_create_pool() - create a new z3fold 902 * z3fold_create_pool() - create a new z3fold pool 903 * @name: pool name 903 * @name: pool name 904 * @gfp: gfp flags when allocating the 904 * @gfp: gfp flags when allocating the z3fold pool structure 905 * 905 * 906 * Return: pointer to the new z3fold pool or N 906 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 907 * failed. 907 * failed. 908 */ 908 */ 909 static struct z3fold_pool *z3fold_create_pool( 909 static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp) 910 { 910 { 911 struct z3fold_pool *pool = NULL; 911 struct z3fold_pool *pool = NULL; 912 int i, cpu; 912 int i, cpu; 913 913 914 pool = kzalloc(sizeof(struct z3fold_po 914 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 915 if (!pool) 915 if (!pool) 916 goto out; 916 goto out; 917 pool->c_handle = kmem_cache_create("z3 917 pool->c_handle = kmem_cache_create("z3fold_handle", 918 sizeof(struct 918 sizeof(struct z3fold_buddy_slots), 919 SLOTS_ALIGN, 0 919 SLOTS_ALIGN, 0, NULL); 920 if (!pool->c_handle) 920 if (!pool->c_handle) 921 goto out_c; 921 goto out_c; 922 spin_lock_init(&pool->lock); 922 spin_lock_init(&pool->lock); 923 spin_lock_init(&pool->stale_lock); 923 spin_lock_init(&pool->stale_lock); 924 pool->unbuddied = __alloc_percpu(sizeo 924 pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS, 925 __ali 925 __alignof__(struct list_head)); 926 if (!pool->unbuddied) 926 if (!pool->unbuddied) 927 goto out_pool; 927 goto out_pool; 928 for_each_possible_cpu(cpu) { 928 for_each_possible_cpu(cpu) { 929 struct list_head *unbuddied = 929 struct list_head *unbuddied = 930 per_cpu_ptr(po 930 per_cpu_ptr(pool->unbuddied, cpu); 931 for_each_unbuddied_list(i, 0) 931 for_each_unbuddied_list(i, 0) 932 INIT_LIST_HEAD(&unbudd 932 INIT_LIST_HEAD(&unbuddied[i]); 933 } 933 } 934 INIT_LIST_HEAD(&pool->stale); 934 INIT_LIST_HEAD(&pool->stale); 935 atomic64_set(&pool->pages_nr, 0); 935 atomic64_set(&pool->pages_nr, 0); 936 pool->name = name; 936 pool->name = name; 937 pool->compact_wq = create_singlethread 937 pool->compact_wq = create_singlethread_workqueue(pool->name); 938 if (!pool->compact_wq) 938 if (!pool->compact_wq) 939 goto out_unbuddied; 939 goto out_unbuddied; 940 pool->release_wq = create_singlethread 940 pool->release_wq = create_singlethread_workqueue(pool->name); 941 if (!pool->release_wq) 941 if (!pool->release_wq) 942 goto out_wq; 942 goto out_wq; 943 INIT_WORK(&pool->work, free_pages_work 943 INIT_WORK(&pool->work, free_pages_work); 944 return pool; 944 return pool; 945 945 946 out_wq: 946 out_wq: 947 destroy_workqueue(pool->compact_wq); 947 destroy_workqueue(pool->compact_wq); 948 out_unbuddied: 948 out_unbuddied: 949 free_percpu(pool->unbuddied); 949 free_percpu(pool->unbuddied); 950 out_pool: 950 out_pool: 951 kmem_cache_destroy(pool->c_handle); 951 kmem_cache_destroy(pool->c_handle); 952 out_c: 952 out_c: 953 kfree(pool); 953 kfree(pool); 954 out: 954 out: 955 return NULL; 955 return NULL; 956 } 956 } 957 957 958 /** 958 /** 959 * z3fold_destroy_pool() - destroys an existin 959 * z3fold_destroy_pool() - destroys an existing z3fold pool 960 * @pool: the z3fold pool to be destroye 960 * @pool: the z3fold pool to be destroyed 961 * 961 * 962 * The pool should be emptied before this func 962 * The pool should be emptied before this function is called. 963 */ 963 */ 964 static void z3fold_destroy_pool(struct z3fold_ 964 static void z3fold_destroy_pool(struct z3fold_pool *pool) 965 { 965 { 966 kmem_cache_destroy(pool->c_handle); 966 kmem_cache_destroy(pool->c_handle); 967 967 968 /* 968 /* 969 * We need to destroy pool->compact_wq 969 * We need to destroy pool->compact_wq before pool->release_wq, 970 * as any pending work on pool->compac 970 * as any pending work on pool->compact_wq will call 971 * queue_work(pool->release_wq, &pool- 971 * queue_work(pool->release_wq, &pool->work). 972 * 972 * 973 * There are still outstanding pages u 973 * There are still outstanding pages until both workqueues are drained, 974 * so we cannot unregister migration u 974 * so we cannot unregister migration until then. 975 */ 975 */ 976 976 977 destroy_workqueue(pool->compact_wq); 977 destroy_workqueue(pool->compact_wq); 978 destroy_workqueue(pool->release_wq); 978 destroy_workqueue(pool->release_wq); 979 free_percpu(pool->unbuddied); 979 free_percpu(pool->unbuddied); 980 kfree(pool); 980 kfree(pool); 981 } 981 } 982 982 983 static const struct movable_operations z3fold_ 983 static const struct movable_operations z3fold_mops; 984 984 985 /** 985 /** 986 * z3fold_alloc() - allocates a region of a gi 986 * z3fold_alloc() - allocates a region of a given size 987 * @pool: z3fold pool from which to allo 987 * @pool: z3fold pool from which to allocate 988 * @size: size in bytes of the desired a 988 * @size: size in bytes of the desired allocation 989 * @gfp: gfp flags used if the pool nee 989 * @gfp: gfp flags used if the pool needs to grow 990 * @handle: handle of the new allocation 990 * @handle: handle of the new allocation 991 * 991 * 992 * This function will attempt to find a free r 992 * This function will attempt to find a free region in the pool large enough to 993 * satisfy the allocation request. A search o 993 * satisfy the allocation request. A search of the unbuddied lists is 994 * performed first. If no suitable free region 994 * performed first. If no suitable free region is found, then a new page is 995 * allocated and added to the pool to satisfy 995 * allocated and added to the pool to satisfy the request. 996 * 996 * 997 * Return: 0 if success and handle is set, oth 997 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 998 * gfp arguments are invalid or -ENOMEM if the 998 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 999 * a new page. 999 * a new page. 1000 */ 1000 */ 1001 static int z3fold_alloc(struct z3fold_pool *p 1001 static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 1002 unsigned long *handle 1002 unsigned long *handle) 1003 { 1003 { 1004 int chunks = size_to_chunks(size); 1004 int chunks = size_to_chunks(size); 1005 struct z3fold_header *zhdr = NULL; 1005 struct z3fold_header *zhdr = NULL; 1006 struct page *page = NULL; 1006 struct page *page = NULL; 1007 enum buddy bud; 1007 enum buddy bud; 1008 bool can_sleep = gfpflags_allow_block 1008 bool can_sleep = gfpflags_allow_blocking(gfp); 1009 1009 1010 if (!size || (gfp & __GFP_HIGHMEM)) 1010 if (!size || (gfp & __GFP_HIGHMEM)) 1011 return -EINVAL; 1011 return -EINVAL; 1012 1012 1013 if (size > PAGE_SIZE) 1013 if (size > PAGE_SIZE) 1014 return -ENOSPC; 1014 return -ENOSPC; 1015 1015 1016 if (size > PAGE_SIZE - ZHDR_SIZE_ALIG 1016 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 1017 bud = HEADLESS; 1017 bud = HEADLESS; 1018 else { 1018 else { 1019 retry: 1019 retry: 1020 zhdr = __z3fold_alloc(pool, s 1020 zhdr = __z3fold_alloc(pool, size, can_sleep); 1021 if (zhdr) { 1021 if (zhdr) { 1022 bud = get_free_buddy( 1022 bud = get_free_buddy(zhdr, chunks); 1023 if (bud == HEADLESS) 1023 if (bud == HEADLESS) { 1024 if (!put_z3fo 1024 if (!put_z3fold_locked(zhdr)) 1025 z3fol 1025 z3fold_page_unlock(zhdr); 1026 pr_err("No fr 1026 pr_err("No free chunks in unbuddied\n"); 1027 WARN_ON(1); 1027 WARN_ON(1); 1028 goto retry; 1028 goto retry; 1029 } 1029 } 1030 page = virt_to_page(z 1030 page = virt_to_page(zhdr); 1031 goto found; 1031 goto found; 1032 } 1032 } 1033 bud = FIRST; 1033 bud = FIRST; 1034 } 1034 } 1035 1035 1036 page = alloc_page(gfp); 1036 page = alloc_page(gfp); 1037 if (!page) 1037 if (!page) 1038 return -ENOMEM; 1038 return -ENOMEM; 1039 1039 1040 zhdr = init_z3fold_page(page, bud == 1040 zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 1041 if (!zhdr) { 1041 if (!zhdr) { 1042 __free_page(page); 1042 __free_page(page); 1043 return -ENOMEM; 1043 return -ENOMEM; 1044 } 1044 } 1045 atomic64_inc(&pool->pages_nr); 1045 atomic64_inc(&pool->pages_nr); 1046 1046 1047 if (bud == HEADLESS) { 1047 if (bud == HEADLESS) { 1048 set_bit(PAGE_HEADLESS, &page- 1048 set_bit(PAGE_HEADLESS, &page->private); 1049 goto headless; 1049 goto headless; 1050 } 1050 } 1051 if (can_sleep) { 1051 if (can_sleep) { 1052 lock_page(page); 1052 lock_page(page); 1053 __SetPageMovable(page, &z3fol 1053 __SetPageMovable(page, &z3fold_mops); 1054 unlock_page(page); 1054 unlock_page(page); 1055 } else { 1055 } else { 1056 WARN_ON(!trylock_page(page)); 1056 WARN_ON(!trylock_page(page)); 1057 __SetPageMovable(page, &z3fol 1057 __SetPageMovable(page, &z3fold_mops); 1058 unlock_page(page); 1058 unlock_page(page); 1059 } 1059 } 1060 z3fold_page_lock(zhdr); 1060 z3fold_page_lock(zhdr); 1061 1061 1062 found: 1062 found: 1063 if (bud == FIRST) 1063 if (bud == FIRST) 1064 zhdr->first_chunks = chunks; 1064 zhdr->first_chunks = chunks; 1065 else if (bud == LAST) 1065 else if (bud == LAST) 1066 zhdr->last_chunks = chunks; 1066 zhdr->last_chunks = chunks; 1067 else { 1067 else { 1068 zhdr->middle_chunks = chunks; 1068 zhdr->middle_chunks = chunks; 1069 zhdr->start_middle = zhdr->fi 1069 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 1070 } 1070 } 1071 add_to_unbuddied(pool, zhdr); 1071 add_to_unbuddied(pool, zhdr); 1072 1072 1073 headless: 1073 headless: 1074 spin_lock(&pool->lock); 1074 spin_lock(&pool->lock); 1075 *handle = encode_handle(zhdr, bud); 1075 *handle = encode_handle(zhdr, bud); 1076 spin_unlock(&pool->lock); 1076 spin_unlock(&pool->lock); 1077 if (bud != HEADLESS) 1077 if (bud != HEADLESS) 1078 z3fold_page_unlock(zhdr); 1078 z3fold_page_unlock(zhdr); 1079 1079 1080 return 0; 1080 return 0; 1081 } 1081 } 1082 1082 1083 /** 1083 /** 1084 * z3fold_free() - frees the allocation assoc 1084 * z3fold_free() - frees the allocation associated with the given handle 1085 * @pool: pool in which the allocation 1085 * @pool: pool in which the allocation resided 1086 * @handle: handle associated with the al 1086 * @handle: handle associated with the allocation returned by z3fold_alloc() 1087 * 1087 * 1088 * In the case that the z3fold page in which 1088 * In the case that the z3fold page in which the allocation resides is under 1089 * reclaim, as indicated by the PAGE_CLAIMED 1089 * reclaim, as indicated by the PAGE_CLAIMED flag being set, this function 1090 * only sets the first|middle|last_chunks to 1090 * only sets the first|middle|last_chunks to 0. The page is actually freed 1091 * once all buddies are evicted (see z3fold_r 1091 * once all buddies are evicted (see z3fold_reclaim_page() below). 1092 */ 1092 */ 1093 static void z3fold_free(struct z3fold_pool *p 1093 static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 1094 { 1094 { 1095 struct z3fold_header *zhdr; 1095 struct z3fold_header *zhdr; 1096 struct page *page; 1096 struct page *page; 1097 enum buddy bud; 1097 enum buddy bud; 1098 bool page_claimed; 1098 bool page_claimed; 1099 1099 1100 zhdr = get_z3fold_header(handle); 1100 zhdr = get_z3fold_header(handle); 1101 page = virt_to_page(zhdr); 1101 page = virt_to_page(zhdr); 1102 page_claimed = test_and_set_bit(PAGE_ 1102 page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 1103 1103 1104 if (test_bit(PAGE_HEADLESS, &page->pr 1104 if (test_bit(PAGE_HEADLESS, &page->private)) { 1105 /* if a headless page is unde 1105 /* if a headless page is under reclaim, just leave. 1106 * NB: we use test_and_set_bi 1106 * NB: we use test_and_set_bit for a reason: if the bit 1107 * has not been set before, w 1107 * has not been set before, we release this page 1108 * immediately so we don't ca 1108 * immediately so we don't care about its value any more. 1109 */ 1109 */ 1110 if (!page_claimed) { 1110 if (!page_claimed) { 1111 put_z3fold_header(zhd 1111 put_z3fold_header(zhdr); 1112 free_z3fold_page(page 1112 free_z3fold_page(page, true); 1113 atomic64_dec(&pool->p 1113 atomic64_dec(&pool->pages_nr); 1114 } 1114 } 1115 return; 1115 return; 1116 } 1116 } 1117 1117 1118 /* Non-headless case */ 1118 /* Non-headless case */ 1119 bud = handle_to_buddy(handle); 1119 bud = handle_to_buddy(handle); 1120 1120 1121 switch (bud) { 1121 switch (bud) { 1122 case FIRST: 1122 case FIRST: 1123 zhdr->first_chunks = 0; 1123 zhdr->first_chunks = 0; 1124 break; 1124 break; 1125 case MIDDLE: 1125 case MIDDLE: 1126 zhdr->middle_chunks = 0; 1126 zhdr->middle_chunks = 0; 1127 break; 1127 break; 1128 case LAST: 1128 case LAST: 1129 zhdr->last_chunks = 0; 1129 zhdr->last_chunks = 0; 1130 break; 1130 break; 1131 default: 1131 default: 1132 pr_err("%s: unknown bud %d\n" 1132 pr_err("%s: unknown bud %d\n", __func__, bud); 1133 WARN_ON(1); 1133 WARN_ON(1); 1134 put_z3fold_header(zhdr); 1134 put_z3fold_header(zhdr); 1135 return; 1135 return; 1136 } 1136 } 1137 1137 1138 if (!page_claimed) 1138 if (!page_claimed) 1139 free_handle(handle, zhdr); 1139 free_handle(handle, zhdr); 1140 if (put_z3fold_locked_list(zhdr)) 1140 if (put_z3fold_locked_list(zhdr)) 1141 return; 1141 return; 1142 if (page_claimed) { 1142 if (page_claimed) { 1143 /* the page has not been clai 1143 /* the page has not been claimed by us */ 1144 put_z3fold_header(zhdr); 1144 put_z3fold_header(zhdr); 1145 return; 1145 return; 1146 } 1146 } 1147 if (test_and_set_bit(NEEDS_COMPACTING 1147 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1148 clear_bit(PAGE_CLAIMED, &page 1148 clear_bit(PAGE_CLAIMED, &page->private); 1149 put_z3fold_header(zhdr); 1149 put_z3fold_header(zhdr); 1150 return; 1150 return; 1151 } 1151 } 1152 if (zhdr->cpu < 0 || !cpu_online(zhdr 1152 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1153 zhdr->cpu = -1; 1153 zhdr->cpu = -1; 1154 kref_get(&zhdr->refcount); 1154 kref_get(&zhdr->refcount); 1155 clear_bit(PAGE_CLAIMED, &page 1155 clear_bit(PAGE_CLAIMED, &page->private); 1156 do_compact_page(zhdr, true); 1156 do_compact_page(zhdr, true); 1157 return; 1157 return; 1158 } 1158 } 1159 kref_get(&zhdr->refcount); 1159 kref_get(&zhdr->refcount); 1160 clear_bit(PAGE_CLAIMED, &page->privat 1160 clear_bit(PAGE_CLAIMED, &page->private); 1161 queue_work_on(zhdr->cpu, pool->compac 1161 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1162 put_z3fold_header(zhdr); 1162 put_z3fold_header(zhdr); 1163 } 1163 } 1164 1164 1165 /** 1165 /** 1166 * z3fold_map() - maps the allocation associa 1166 * z3fold_map() - maps the allocation associated with the given handle 1167 * @pool: pool in which the allocation 1167 * @pool: pool in which the allocation resides 1168 * @handle: handle associated with the al 1168 * @handle: handle associated with the allocation to be mapped 1169 * 1169 * 1170 * Extracts the buddy number from handle and 1170 * Extracts the buddy number from handle and constructs the pointer to the 1171 * correct starting chunk within the page. 1171 * correct starting chunk within the page. 1172 * 1172 * 1173 * Returns: a pointer to the mapped allocatio 1173 * Returns: a pointer to the mapped allocation 1174 */ 1174 */ 1175 static void *z3fold_map(struct z3fold_pool *p 1175 static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 1176 { 1176 { 1177 struct z3fold_header *zhdr; 1177 struct z3fold_header *zhdr; 1178 struct page *page; 1178 struct page *page; 1179 void *addr; 1179 void *addr; 1180 enum buddy buddy; 1180 enum buddy buddy; 1181 1181 1182 zhdr = get_z3fold_header(handle); 1182 zhdr = get_z3fold_header(handle); 1183 addr = zhdr; 1183 addr = zhdr; 1184 page = virt_to_page(zhdr); 1184 page = virt_to_page(zhdr); 1185 1185 1186 if (test_bit(PAGE_HEADLESS, &page->pr 1186 if (test_bit(PAGE_HEADLESS, &page->private)) 1187 goto out; 1187 goto out; 1188 1188 1189 buddy = handle_to_buddy(handle); 1189 buddy = handle_to_buddy(handle); 1190 switch (buddy) { 1190 switch (buddy) { 1191 case FIRST: 1191 case FIRST: 1192 addr += ZHDR_SIZE_ALIGNED; 1192 addr += ZHDR_SIZE_ALIGNED; 1193 break; 1193 break; 1194 case MIDDLE: 1194 case MIDDLE: 1195 addr += zhdr->start_middle << 1195 addr += zhdr->start_middle << CHUNK_SHIFT; 1196 set_bit(MIDDLE_CHUNK_MAPPED, 1196 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1197 break; 1197 break; 1198 case LAST: 1198 case LAST: 1199 addr += PAGE_SIZE - (handle_t 1199 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 1200 break; 1200 break; 1201 default: 1201 default: 1202 pr_err("unknown buddy id %d\n 1202 pr_err("unknown buddy id %d\n", buddy); 1203 WARN_ON(1); 1203 WARN_ON(1); 1204 addr = NULL; 1204 addr = NULL; 1205 break; 1205 break; 1206 } 1206 } 1207 1207 1208 if (addr) 1208 if (addr) 1209 zhdr->mapped_count++; 1209 zhdr->mapped_count++; 1210 out: 1210 out: 1211 put_z3fold_header(zhdr); 1211 put_z3fold_header(zhdr); 1212 return addr; 1212 return addr; 1213 } 1213 } 1214 1214 1215 /** 1215 /** 1216 * z3fold_unmap() - unmaps the allocation ass 1216 * z3fold_unmap() - unmaps the allocation associated with the given handle 1217 * @pool: pool in which the allocation 1217 * @pool: pool in which the allocation resides 1218 * @handle: handle associated with the al 1218 * @handle: handle associated with the allocation to be unmapped 1219 */ 1219 */ 1220 static void z3fold_unmap(struct z3fold_pool * 1220 static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1221 { 1221 { 1222 struct z3fold_header *zhdr; 1222 struct z3fold_header *zhdr; 1223 struct page *page; 1223 struct page *page; 1224 enum buddy buddy; 1224 enum buddy buddy; 1225 1225 1226 zhdr = get_z3fold_header(handle); 1226 zhdr = get_z3fold_header(handle); 1227 page = virt_to_page(zhdr); 1227 page = virt_to_page(zhdr); 1228 1228 1229 if (test_bit(PAGE_HEADLESS, &page->pr 1229 if (test_bit(PAGE_HEADLESS, &page->private)) 1230 return; 1230 return; 1231 1231 1232 buddy = handle_to_buddy(handle); 1232 buddy = handle_to_buddy(handle); 1233 if (buddy == MIDDLE) 1233 if (buddy == MIDDLE) 1234 clear_bit(MIDDLE_CHUNK_MAPPED 1234 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1235 zhdr->mapped_count--; 1235 zhdr->mapped_count--; 1236 put_z3fold_header(zhdr); 1236 put_z3fold_header(zhdr); 1237 } 1237 } 1238 1238 1239 /** 1239 /** 1240 * z3fold_get_pool_pages() - gets the z3fold 1240 * z3fold_get_pool_pages() - gets the z3fold pool size in pages 1241 * @pool: pool whose size is being quer 1241 * @pool: pool whose size is being queried 1242 * 1242 * 1243 * Returns: size in pages of the given pool. 1243 * Returns: size in pages of the given pool. 1244 */ 1244 */ 1245 static u64 z3fold_get_pool_pages(struct z3fol 1245 static u64 z3fold_get_pool_pages(struct z3fold_pool *pool) 1246 { 1246 { 1247 return atomic64_read(&pool->pages_nr) 1247 return atomic64_read(&pool->pages_nr); 1248 } 1248 } 1249 1249 1250 static bool z3fold_page_isolate(struct page * 1250 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 1251 { 1251 { 1252 struct z3fold_header *zhdr; 1252 struct z3fold_header *zhdr; 1253 struct z3fold_pool *pool; 1253 struct z3fold_pool *pool; 1254 1254 1255 VM_BUG_ON_PAGE(PageIsolated(page), pa 1255 VM_BUG_ON_PAGE(PageIsolated(page), page); 1256 1256 1257 if (test_bit(PAGE_HEADLESS, &page->pr 1257 if (test_bit(PAGE_HEADLESS, &page->private)) 1258 return false; 1258 return false; 1259 1259 1260 zhdr = page_address(page); 1260 zhdr = page_address(page); 1261 z3fold_page_lock(zhdr); 1261 z3fold_page_lock(zhdr); 1262 if (test_bit(NEEDS_COMPACTING, &page- 1262 if (test_bit(NEEDS_COMPACTING, &page->private) || 1263 test_bit(PAGE_STALE, &page->priva 1263 test_bit(PAGE_STALE, &page->private)) 1264 goto out; 1264 goto out; 1265 1265 1266 if (zhdr->mapped_count != 0 || zhdr-> 1266 if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 1267 goto out; 1267 goto out; 1268 1268 1269 if (test_and_set_bit(PAGE_CLAIMED, &p 1269 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 1270 goto out; 1270 goto out; 1271 pool = zhdr_to_pool(zhdr); 1271 pool = zhdr_to_pool(zhdr); 1272 spin_lock(&pool->lock); 1272 spin_lock(&pool->lock); 1273 if (!list_empty(&zhdr->buddy)) 1273 if (!list_empty(&zhdr->buddy)) 1274 list_del_init(&zhdr->buddy); 1274 list_del_init(&zhdr->buddy); 1275 spin_unlock(&pool->lock); 1275 spin_unlock(&pool->lock); 1276 1276 1277 kref_get(&zhdr->refcount); 1277 kref_get(&zhdr->refcount); 1278 z3fold_page_unlock(zhdr); 1278 z3fold_page_unlock(zhdr); 1279 return true; 1279 return true; 1280 1280 1281 out: 1281 out: 1282 z3fold_page_unlock(zhdr); 1282 z3fold_page_unlock(zhdr); 1283 return false; 1283 return false; 1284 } 1284 } 1285 1285 1286 static int z3fold_page_migrate(struct page *n 1286 static int z3fold_page_migrate(struct page *newpage, struct page *page, 1287 enum migrate_mode mode) 1287 enum migrate_mode mode) 1288 { 1288 { 1289 struct z3fold_header *zhdr, *new_zhdr 1289 struct z3fold_header *zhdr, *new_zhdr; 1290 struct z3fold_pool *pool; 1290 struct z3fold_pool *pool; 1291 1291 1292 VM_BUG_ON_PAGE(!PageIsolated(page), p 1292 VM_BUG_ON_PAGE(!PageIsolated(page), page); 1293 VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED 1293 VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); 1294 VM_BUG_ON_PAGE(!PageLocked(newpage), 1294 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 1295 1295 1296 zhdr = page_address(page); 1296 zhdr = page_address(page); 1297 pool = zhdr_to_pool(zhdr); 1297 pool = zhdr_to_pool(zhdr); 1298 1298 1299 if (!z3fold_page_trylock(zhdr)) 1299 if (!z3fold_page_trylock(zhdr)) 1300 return -EAGAIN; 1300 return -EAGAIN; 1301 if (zhdr->mapped_count != 0 || zhdr-> 1301 if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 1302 clear_bit(PAGE_CLAIMED, &page 1302 clear_bit(PAGE_CLAIMED, &page->private); 1303 z3fold_page_unlock(zhdr); 1303 z3fold_page_unlock(zhdr); 1304 return -EBUSY; 1304 return -EBUSY; 1305 } 1305 } 1306 if (work_pending(&zhdr->work)) { 1306 if (work_pending(&zhdr->work)) { 1307 z3fold_page_unlock(zhdr); 1307 z3fold_page_unlock(zhdr); 1308 return -EAGAIN; 1308 return -EAGAIN; 1309 } 1309 } 1310 new_zhdr = page_address(newpage); 1310 new_zhdr = page_address(newpage); 1311 memcpy(new_zhdr, zhdr, PAGE_SIZE); 1311 memcpy(new_zhdr, zhdr, PAGE_SIZE); 1312 newpage->private = page->private; 1312 newpage->private = page->private; 1313 set_bit(PAGE_MIGRATED, &page->private 1313 set_bit(PAGE_MIGRATED, &page->private); 1314 z3fold_page_unlock(zhdr); 1314 z3fold_page_unlock(zhdr); 1315 spin_lock_init(&new_zhdr->page_lock); 1315 spin_lock_init(&new_zhdr->page_lock); 1316 INIT_WORK(&new_zhdr->work, compact_pa 1316 INIT_WORK(&new_zhdr->work, compact_page_work); 1317 /* 1317 /* 1318 * z3fold_page_isolate() ensures that 1318 * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1319 * so we only have to reinitialize it 1319 * so we only have to reinitialize it. 1320 */ 1320 */ 1321 INIT_LIST_HEAD(&new_zhdr->buddy); 1321 INIT_LIST_HEAD(&new_zhdr->buddy); 1322 __ClearPageMovable(page); 1322 __ClearPageMovable(page); 1323 1323 1324 get_page(newpage); 1324 get_page(newpage); 1325 z3fold_page_lock(new_zhdr); 1325 z3fold_page_lock(new_zhdr); 1326 if (new_zhdr->first_chunks) 1326 if (new_zhdr->first_chunks) 1327 encode_handle(new_zhdr, FIRST 1327 encode_handle(new_zhdr, FIRST); 1328 if (new_zhdr->last_chunks) 1328 if (new_zhdr->last_chunks) 1329 encode_handle(new_zhdr, LAST) 1329 encode_handle(new_zhdr, LAST); 1330 if (new_zhdr->middle_chunks) 1330 if (new_zhdr->middle_chunks) 1331 encode_handle(new_zhdr, MIDDL 1331 encode_handle(new_zhdr, MIDDLE); 1332 set_bit(NEEDS_COMPACTING, &newpage->p 1332 set_bit(NEEDS_COMPACTING, &newpage->private); 1333 new_zhdr->cpu = smp_processor_id(); 1333 new_zhdr->cpu = smp_processor_id(); 1334 __SetPageMovable(newpage, &z3fold_mop 1334 __SetPageMovable(newpage, &z3fold_mops); 1335 z3fold_page_unlock(new_zhdr); 1335 z3fold_page_unlock(new_zhdr); 1336 1336 1337 queue_work_on(new_zhdr->cpu, pool->co 1337 queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 1338 1338 1339 /* PAGE_CLAIMED and PAGE_MIGRATED are 1339 /* PAGE_CLAIMED and PAGE_MIGRATED are cleared now. */ 1340 page->private = 0; 1340 page->private = 0; 1341 put_page(page); 1341 put_page(page); 1342 return 0; 1342 return 0; 1343 } 1343 } 1344 1344 1345 static void z3fold_page_putback(struct page * 1345 static void z3fold_page_putback(struct page *page) 1346 { 1346 { 1347 struct z3fold_header *zhdr; 1347 struct z3fold_header *zhdr; 1348 struct z3fold_pool *pool; 1348 struct z3fold_pool *pool; 1349 1349 1350 zhdr = page_address(page); 1350 zhdr = page_address(page); 1351 pool = zhdr_to_pool(zhdr); 1351 pool = zhdr_to_pool(zhdr); 1352 1352 1353 z3fold_page_lock(zhdr); 1353 z3fold_page_lock(zhdr); 1354 if (!list_empty(&zhdr->buddy)) 1354 if (!list_empty(&zhdr->buddy)) 1355 list_del_init(&zhdr->buddy); 1355 list_del_init(&zhdr->buddy); 1356 INIT_LIST_HEAD(&page->lru); 1356 INIT_LIST_HEAD(&page->lru); 1357 if (put_z3fold_locked(zhdr)) 1357 if (put_z3fold_locked(zhdr)) 1358 return; 1358 return; 1359 if (list_empty(&zhdr->buddy)) 1359 if (list_empty(&zhdr->buddy)) 1360 add_to_unbuddied(pool, zhdr); 1360 add_to_unbuddied(pool, zhdr); 1361 clear_bit(PAGE_CLAIMED, &page->privat 1361 clear_bit(PAGE_CLAIMED, &page->private); 1362 z3fold_page_unlock(zhdr); 1362 z3fold_page_unlock(zhdr); 1363 } 1363 } 1364 1364 1365 static const struct movable_operations z3fold 1365 static const struct movable_operations z3fold_mops = { 1366 .isolate_page = z3fold_page_isolate, 1366 .isolate_page = z3fold_page_isolate, 1367 .migrate_page = z3fold_page_migrate, 1367 .migrate_page = z3fold_page_migrate, 1368 .putback_page = z3fold_page_putback, 1368 .putback_page = z3fold_page_putback, 1369 }; 1369 }; 1370 1370 1371 /***************** 1371 /***************** 1372 * zpool 1372 * zpool 1373 ****************/ 1373 ****************/ 1374 1374 1375 static void *z3fold_zpool_create(const char * 1375 static void *z3fold_zpool_create(const char *name, gfp_t gfp) 1376 { 1376 { 1377 return z3fold_create_pool(name, gfp); 1377 return z3fold_create_pool(name, gfp); 1378 } 1378 } 1379 1379 1380 static void z3fold_zpool_destroy(void *pool) 1380 static void z3fold_zpool_destroy(void *pool) 1381 { 1381 { 1382 z3fold_destroy_pool(pool); 1382 z3fold_destroy_pool(pool); 1383 } 1383 } 1384 1384 1385 static int z3fold_zpool_malloc(void *pool, si 1385 static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1386 unsigned long *handle 1386 unsigned long *handle) 1387 { 1387 { 1388 return z3fold_alloc(pool, size, gfp, 1388 return z3fold_alloc(pool, size, gfp, handle); 1389 } 1389 } 1390 static void z3fold_zpool_free(void *pool, uns 1390 static void z3fold_zpool_free(void *pool, unsigned long handle) 1391 { 1391 { 1392 z3fold_free(pool, handle); 1392 z3fold_free(pool, handle); 1393 } 1393 } 1394 1394 1395 static void *z3fold_zpool_map(void *pool, uns 1395 static void *z3fold_zpool_map(void *pool, unsigned long handle, 1396 enum zpool_mapmode mm 1396 enum zpool_mapmode mm) 1397 { 1397 { 1398 return z3fold_map(pool, handle); 1398 return z3fold_map(pool, handle); 1399 } 1399 } 1400 static void z3fold_zpool_unmap(void *pool, un 1400 static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1401 { 1401 { 1402 z3fold_unmap(pool, handle); 1402 z3fold_unmap(pool, handle); 1403 } 1403 } 1404 1404 1405 static u64 z3fold_zpool_total_pages(void *poo 1405 static u64 z3fold_zpool_total_pages(void *pool) 1406 { 1406 { 1407 return z3fold_get_pool_pages(pool); 1407 return z3fold_get_pool_pages(pool); 1408 } 1408 } 1409 1409 1410 static struct zpool_driver z3fold_zpool_drive 1410 static struct zpool_driver z3fold_zpool_driver = { 1411 .type = "z3fold", 1411 .type = "z3fold", 1412 .sleep_mapped = true, 1412 .sleep_mapped = true, 1413 .owner = THIS_MODULE, 1413 .owner = THIS_MODULE, 1414 .create = z3fold_zpool_create, 1414 .create = z3fold_zpool_create, 1415 .destroy = z3fold_zpool_destroy, 1415 .destroy = z3fold_zpool_destroy, 1416 .malloc = z3fold_zpool_malloc, 1416 .malloc = z3fold_zpool_malloc, 1417 .free = z3fold_zpool_free, 1417 .free = z3fold_zpool_free, 1418 .map = z3fold_zpool_map, 1418 .map = z3fold_zpool_map, 1419 .unmap = z3fold_zpool_unmap, 1419 .unmap = z3fold_zpool_unmap, 1420 .total_pages = z3fold_zpool_total_pa 1420 .total_pages = z3fold_zpool_total_pages, 1421 }; 1421 }; 1422 1422 1423 MODULE_ALIAS("zpool-z3fold"); 1423 MODULE_ALIAS("zpool-z3fold"); 1424 1424 1425 static int __init init_z3fold(void) 1425 static int __init init_z3fold(void) 1426 { 1426 { 1427 /* 1427 /* 1428 * Make sure the z3fold header is not 1428 * Make sure the z3fold header is not larger than the page size and 1429 * there has remaining spaces for its 1429 * there has remaining spaces for its buddy. 1430 */ 1430 */ 1431 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE 1431 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); 1432 zpool_register_driver(&z3fold_zpool_d 1432 zpool_register_driver(&z3fold_zpool_driver); 1433 1433 1434 return 0; 1434 return 0; 1435 } 1435 } 1436 1436 1437 static void __exit exit_z3fold(void) 1437 static void __exit exit_z3fold(void) 1438 { 1438 { 1439 zpool_unregister_driver(&z3fold_zpool 1439 zpool_unregister_driver(&z3fold_zpool_driver); 1440 } 1440 } 1441 1441 1442 module_init(init_z3fold); 1442 module_init(init_z3fold); 1443 module_exit(exit_z3fold); 1443 module_exit(exit_z3fold); 1444 1444 1445 MODULE_LICENSE("GPL"); 1445 MODULE_LICENSE("GPL"); 1446 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail. 1446 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1447 MODULE_DESCRIPTION("3-Fold Allocator for Comp 1447 MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1448 1448
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.