1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * z3fold.c 2 * z3fold.c 4 * 3 * 5 * Author: Vitaly Wool <vitaly.wool@konsulko.c 4 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 6 * Copyright (C) 2016, Sony Mobile Communicati 5 * Copyright (C) 2016, Sony Mobile Communications Inc. 7 * 6 * 8 * This implementation is based on zbud writte 7 * This implementation is based on zbud written by Seth Jennings. 9 * 8 * 10 * z3fold is an special purpose allocator for 9 * z3fold is an special purpose allocator for storing compressed pages. It 11 * can store up to three compressed pages per 10 * can store up to three compressed pages per page which improves the 12 * compression ratio of zbud while retaining i 11 * compression ratio of zbud while retaining its main concepts (e. g. always 13 * storing an integral number of objects per p 12 * storing an integral number of objects per page) and simplicity. 14 * It still has simple and deterministic recla 13 * It still has simple and deterministic reclaim properties that make it 15 * preferable to a higher density approach (wi 14 * preferable to a higher density approach (with no requirement on integral 16 * number of object per page) when reclaim is 15 * number of object per page) when reclaim is used. 17 * 16 * 18 * As in zbud, pages are divided into "chunks" 17 * As in zbud, pages are divided into "chunks". The size of the chunks is 19 * fixed at compile time and is determined by 18 * fixed at compile time and is determined by NCHUNKS_ORDER below. 20 * 19 * 21 * z3fold doesn't export any API and is meant 20 * z3fold doesn't export any API and is meant to be used via zpool API. 22 */ 21 */ 23 22 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 25 24 26 #include <linux/atomic.h> 25 #include <linux/atomic.h> 27 #include <linux/sched.h> 26 #include <linux/sched.h> 28 #include <linux/cpumask.h> << 29 #include <linux/list.h> 27 #include <linux/list.h> 30 #include <linux/mm.h> 28 #include <linux/mm.h> 31 #include <linux/module.h> 29 #include <linux/module.h> 32 #include <linux/page-flags.h> << 33 #include <linux/migrate.h> << 34 #include <linux/node.h> << 35 #include <linux/compaction.h> << 36 #include <linux/percpu.h> 30 #include <linux/percpu.h> 37 #include <linux/preempt.h> 31 #include <linux/preempt.h> 38 #include <linux/workqueue.h> 32 #include <linux/workqueue.h> 39 #include <linux/slab.h> 33 #include <linux/slab.h> 40 #include <linux/spinlock.h> 34 #include <linux/spinlock.h> 41 #include <linux/zpool.h> 35 #include <linux/zpool.h> 42 #include <linux/kmemleak.h> << 43 << 44 /* << 45 * NCHUNKS_ORDER determines the internal alloc << 46 * adjusting internal fragmentation. It also << 47 * freelists maintained in each pool. NCHUNKS_ << 48 * allocation granularity will be in chunks of << 49 * in the beginning of an allocated page are o << 50 * NCHUNKS will be calculated to 63 (or 62 in << 51 * which shows the max number of free chunks i << 52 * be 63, or 62, respectively, freelists per p << 53 */ << 54 #define NCHUNKS_ORDER 6 << 55 << 56 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ << 57 #define CHUNK_SIZE (1 << CHUNK_SHIFT) << 58 #define ZHDR_SIZE_ALIGNED round_up(sizeof(stru << 59 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> << 60 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SH << 61 #define NCHUNKS (TOTAL_CHUNKS - ZHDR_C << 62 << 63 #define BUDDY_MASK (0x3) << 64 #define BUDDY_SHIFT 2 << 65 #define SLOTS_ALIGN (0x40) << 66 36 67 /***************** 37 /***************** 68 * Structures 38 * Structures 69 *****************/ 39 *****************/ 70 struct z3fold_pool; 40 struct z3fold_pool; >> 41 struct z3fold_ops { >> 42 int (*evict)(struct z3fold_pool *pool, unsigned long handle); >> 43 }; 71 44 72 enum buddy { 45 enum buddy { 73 HEADLESS = 0, 46 HEADLESS = 0, 74 FIRST, 47 FIRST, 75 MIDDLE, 48 MIDDLE, 76 LAST, 49 LAST, 77 BUDDIES_MAX = LAST !! 50 BUDDIES_MAX 78 }; 51 }; 79 52 80 struct z3fold_buddy_slots { << 81 /* << 82 * we are using BUDDY_MASK in handle_t << 83 * be enough slots to hold all possibl << 84 */ << 85 unsigned long slot[BUDDY_MASK + 1]; << 86 unsigned long pool; /* back link */ << 87 rwlock_t lock; << 88 }; << 89 #define HANDLE_FLAG_MASK (0x03) << 90 << 91 /* 53 /* 92 * struct z3fold_header - z3fold page metadata 54 * struct z3fold_header - z3fold page metadata occupying first chunks of each 93 * z3fold page, except fo 55 * z3fold page, except for HEADLESS pages 94 * @buddy: links the z3fold page 56 * @buddy: links the z3fold page into the relevant list in the 95 * pool 57 * pool 96 * @page_lock: per-page lock 58 * @page_lock: per-page lock 97 * @refcount: reference count for th 59 * @refcount: reference count for the z3fold page 98 * @work: work_struct for page l 60 * @work: work_struct for page layout optimization 99 * @slots: pointer to the structu !! 61 * @pool: pointer to the pool which this page belongs to 100 * @pool: pointer to the contain << 101 * @cpu: CPU which this page "b 62 * @cpu: CPU which this page "belongs" to 102 * @first_chunks: the size of the first 63 * @first_chunks: the size of the first buddy in chunks, 0 if free 103 * @middle_chunks: the size of the middle 64 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 104 * @last_chunks: the size of the last b 65 * @last_chunks: the size of the last buddy in chunks, 0 if free 105 * @first_num: the starting number (f 66 * @first_num: the starting number (for the first handle) 106 * @mapped_count: the number of objects << 107 */ 67 */ 108 struct z3fold_header { 68 struct z3fold_header { 109 struct list_head buddy; 69 struct list_head buddy; 110 spinlock_t page_lock; 70 spinlock_t page_lock; 111 struct kref refcount; 71 struct kref refcount; 112 struct work_struct work; 72 struct work_struct work; 113 struct z3fold_buddy_slots *slots; << 114 struct z3fold_pool *pool; 73 struct z3fold_pool *pool; 115 short cpu; 74 short cpu; 116 unsigned short first_chunks; 75 unsigned short first_chunks; 117 unsigned short middle_chunks; 76 unsigned short middle_chunks; 118 unsigned short last_chunks; 77 unsigned short last_chunks; 119 unsigned short start_middle; 78 unsigned short start_middle; 120 unsigned short first_num:2; 79 unsigned short first_num:2; 121 unsigned short mapped_count:2; << 122 unsigned short foreign_handles:2; << 123 }; 80 }; 124 81 >> 82 /* >> 83 * NCHUNKS_ORDER determines the internal allocation granularity, effectively >> 84 * adjusting internal fragmentation. It also determines the number of >> 85 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the >> 86 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks >> 87 * in the beginning of an allocated page are occupied by z3fold header, so >> 88 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), >> 89 * which shows the max number of free chunks in z3fold page, also there will >> 90 * be 63, or 62, respectively, freelists per pool. >> 91 */ >> 92 #define NCHUNKS_ORDER 6 >> 93 >> 94 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) >> 95 #define CHUNK_SIZE (1 << CHUNK_SHIFT) >> 96 #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) >> 97 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) >> 98 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) >> 99 #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) >> 100 >> 101 #define BUDDY_MASK (0x3) >> 102 125 /** 103 /** 126 * struct z3fold_pool - stores metadata for ea 104 * struct z3fold_pool - stores metadata for each z3fold pool 127 * @name: pool name 105 * @name: pool name 128 * @lock: protects pool unbuddied lists !! 106 * @lock: protects pool unbuddied/lru lists 129 * @stale_lock: protects pool stale page list 107 * @stale_lock: protects pool stale page list 130 * @unbuddied: per-cpu array of lists trackin 108 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 131 * buddies; the list each z3fold 109 * buddies; the list each z3fold page is added to depends on 132 * the size of its free region. 110 * the size of its free region. >> 111 * @lru: list tracking the z3fold pages in LRU order by most recently >> 112 * added buddy. 133 * @stale: list of pages marked for freei 113 * @stale: list of pages marked for freeing 134 * @pages_nr: number of z3fold pages in the 114 * @pages_nr: number of z3fold pages in the pool. 135 * @c_handle: cache for z3fold_buddy_slots a !! 115 * @ops: pointer to a structure of user defined operations specified at >> 116 * pool creation time. 136 * @compact_wq: workqueue for page layout back 117 * @compact_wq: workqueue for page layout background optimization 137 * @release_wq: workqueue for safe page releas 118 * @release_wq: workqueue for safe page release 138 * @work: work_struct for safe page rele 119 * @work: work_struct for safe page release 139 * 120 * 140 * This structure is allocated at pool creatio 121 * This structure is allocated at pool creation time and maintains metadata 141 * pertaining to a particular z3fold pool. 122 * pertaining to a particular z3fold pool. 142 */ 123 */ 143 struct z3fold_pool { 124 struct z3fold_pool { 144 const char *name; 125 const char *name; 145 spinlock_t lock; 126 spinlock_t lock; 146 spinlock_t stale_lock; 127 spinlock_t stale_lock; 147 struct list_head __percpu *unbuddied; !! 128 struct list_head *unbuddied; >> 129 struct list_head lru; 148 struct list_head stale; 130 struct list_head stale; 149 atomic64_t pages_nr; 131 atomic64_t pages_nr; 150 struct kmem_cache *c_handle; !! 132 const struct z3fold_ops *ops; >> 133 struct zpool *zpool; >> 134 const struct zpool_ops *zpool_ops; 151 struct workqueue_struct *compact_wq; 135 struct workqueue_struct *compact_wq; 152 struct workqueue_struct *release_wq; 136 struct workqueue_struct *release_wq; 153 struct work_struct work; 137 struct work_struct work; 154 }; 138 }; 155 139 156 /* 140 /* 157 * Internal z3fold page flags 141 * Internal z3fold page flags 158 */ 142 */ 159 enum z3fold_page_flags { 143 enum z3fold_page_flags { 160 PAGE_HEADLESS = 0, 144 PAGE_HEADLESS = 0, 161 MIDDLE_CHUNK_MAPPED, 145 MIDDLE_CHUNK_MAPPED, 162 NEEDS_COMPACTING, 146 NEEDS_COMPACTING, 163 PAGE_STALE, !! 147 PAGE_STALE 164 PAGE_CLAIMED, /* by either reclaim or << 165 PAGE_MIGRATED, /* page is migrated and << 166 }; 148 }; 167 149 168 /* << 169 * handle flags, go under HANDLE_FLAG_MASK << 170 */ << 171 enum z3fold_handle_flags { << 172 HANDLES_NOFREE = 0, << 173 }; << 174 << 175 /* << 176 * Forward declarations << 177 */ << 178 static struct z3fold_header *__z3fold_alloc(st << 179 static void compact_page_work(struct work_stru << 180 << 181 /***************** 150 /***************** 182 * Helpers 151 * Helpers 183 *****************/ 152 *****************/ 184 153 185 /* Converts an allocation size in bytes to siz 154 /* Converts an allocation size in bytes to size in z3fold chunks */ 186 static int size_to_chunks(size_t size) 155 static int size_to_chunks(size_t size) 187 { 156 { 188 return (size + CHUNK_SIZE - 1) >> CHUN 157 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 189 } 158 } 190 159 191 #define for_each_unbuddied_list(_iter, _begin) 160 #define for_each_unbuddied_list(_iter, _begin) \ 192 for ((_iter) = (_begin); (_iter) < NCH 161 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 193 162 194 static inline struct z3fold_buddy_slots *alloc !! 163 static void compact_page_work(struct work_struct *w); 195 << 196 { << 197 struct z3fold_buddy_slots *slots = kme << 198 << 199 << 200 if (slots) { << 201 /* It will be freed separately << 202 kmemleak_not_leak(slots); << 203 slots->pool = (unsigned long)p << 204 rwlock_init(&slots->lock); << 205 } << 206 << 207 return slots; << 208 } << 209 << 210 static inline struct z3fold_pool *slots_to_poo << 211 { << 212 return (struct z3fold_pool *)(s->pool << 213 } << 214 << 215 static inline struct z3fold_buddy_slots *handl << 216 { << 217 return (struct z3fold_buddy_slots *)(h << 218 } << 219 << 220 /* Lock a z3fold page */ << 221 static inline void z3fold_page_lock(struct z3f << 222 { << 223 spin_lock(&zhdr->page_lock); << 224 } << 225 << 226 /* Try to lock a z3fold page */ << 227 static inline int z3fold_page_trylock(struct z << 228 { << 229 return spin_trylock(&zhdr->page_lock); << 230 } << 231 << 232 /* Unlock a z3fold page */ << 233 static inline void z3fold_page_unlock(struct z << 234 { << 235 spin_unlock(&zhdr->page_lock); << 236 } << 237 << 238 /* return locked z3fold page if it's not headl << 239 static inline struct z3fold_header *get_z3fold << 240 { << 241 struct z3fold_buddy_slots *slots; << 242 struct z3fold_header *zhdr; << 243 int locked = 0; << 244 << 245 if (!(handle & (1 << PAGE_HEADLESS))) << 246 slots = handle_to_slots(handle << 247 do { << 248 unsigned long addr; << 249 << 250 read_lock(&slots->lock << 251 addr = *(unsigned long << 252 zhdr = (struct z3fold_ << 253 locked = z3fold_page_t << 254 read_unlock(&slots->lo << 255 if (locked) { << 256 struct page *p << 257 << 258 if (!test_bit( << 259 break; << 260 z3fold_page_un << 261 } << 262 cpu_relax(); << 263 } while (true); << 264 } else { << 265 zhdr = (struct z3fold_header * << 266 } << 267 << 268 return zhdr; << 269 } << 270 << 271 static inline void put_z3fold_header(struct z3 << 272 { << 273 struct page *page = virt_to_page(zhdr) << 274 << 275 if (!test_bit(PAGE_HEADLESS, &page->pr << 276 z3fold_page_unlock(zhdr); << 277 } << 278 << 279 static inline void free_handle(unsigned long h << 280 { << 281 struct z3fold_buddy_slots *slots; << 282 int i; << 283 bool is_free; << 284 << 285 if (WARN_ON(*(unsigned long *)handle = << 286 return; << 287 << 288 slots = handle_to_slots(handle); << 289 write_lock(&slots->lock); << 290 *(unsigned long *)handle = 0; << 291 << 292 if (test_bit(HANDLES_NOFREE, &slots->p << 293 write_unlock(&slots->lock); << 294 return; /* simple case, nothin << 295 } << 296 << 297 if (zhdr->slots != slots) << 298 zhdr->foreign_handles--; << 299 << 300 is_free = true; << 301 for (i = 0; i <= BUDDY_MASK; i++) { << 302 if (slots->slot[i]) { << 303 is_free = false; << 304 break; << 305 } << 306 } << 307 write_unlock(&slots->lock); << 308 << 309 if (is_free) { << 310 struct z3fold_pool *pool = slo << 311 << 312 if (zhdr->slots == slots) << 313 zhdr->slots = NULL; << 314 kmem_cache_free(pool->c_handle << 315 } << 316 } << 317 164 318 /* Initializes the z3fold header of a newly al 165 /* Initializes the z3fold header of a newly allocated z3fold page */ 319 static struct z3fold_header *init_z3fold_page( !! 166 static struct z3fold_header *init_z3fold_page(struct page *page, 320 struct !! 167 struct z3fold_pool *pool) 321 { 168 { 322 struct z3fold_header *zhdr = page_addr 169 struct z3fold_header *zhdr = page_address(page); 323 struct z3fold_buddy_slots *slots; << 324 170 >> 171 INIT_LIST_HEAD(&page->lru); 325 clear_bit(PAGE_HEADLESS, &page->privat 172 clear_bit(PAGE_HEADLESS, &page->private); 326 clear_bit(MIDDLE_CHUNK_MAPPED, &page-> 173 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 327 clear_bit(NEEDS_COMPACTING, &page->pri 174 clear_bit(NEEDS_COMPACTING, &page->private); 328 clear_bit(PAGE_STALE, &page->private); 175 clear_bit(PAGE_STALE, &page->private); 329 clear_bit(PAGE_CLAIMED, &page->private << 330 clear_bit(PAGE_MIGRATED, &page->privat << 331 if (headless) << 332 return zhdr; << 333 << 334 slots = alloc_slots(pool, gfp); << 335 if (!slots) << 336 return NULL; << 337 176 338 memset(zhdr, 0, sizeof(*zhdr)); << 339 spin_lock_init(&zhdr->page_lock); 177 spin_lock_init(&zhdr->page_lock); 340 kref_init(&zhdr->refcount); 178 kref_init(&zhdr->refcount); >> 179 zhdr->first_chunks = 0; >> 180 zhdr->middle_chunks = 0; >> 181 zhdr->last_chunks = 0; >> 182 zhdr->first_num = 0; >> 183 zhdr->start_middle = 0; 341 zhdr->cpu = -1; 184 zhdr->cpu = -1; 342 zhdr->slots = slots; << 343 zhdr->pool = pool; 185 zhdr->pool = pool; 344 INIT_LIST_HEAD(&zhdr->buddy); 186 INIT_LIST_HEAD(&zhdr->buddy); 345 INIT_WORK(&zhdr->work, compact_page_wo 187 INIT_WORK(&zhdr->work, compact_page_work); 346 return zhdr; 188 return zhdr; 347 } 189 } 348 190 349 /* Resets the struct page fields and frees the 191 /* Resets the struct page fields and frees the page */ 350 static void free_z3fold_page(struct page *page !! 192 static void free_z3fold_page(struct page *page) 351 { 193 { 352 if (!headless) { << 353 lock_page(page); << 354 __ClearPageMovable(page); << 355 unlock_page(page); << 356 } << 357 __free_page(page); 194 __free_page(page); 358 } 195 } 359 196 360 /* Helper function to build the index */ !! 197 /* Lock a z3fold page */ 361 static inline int __idx(struct z3fold_header * !! 198 static inline void z3fold_page_lock(struct z3fold_header *zhdr) 362 { 199 { 363 return (bud + zhdr->first_num) & BUDDY !! 200 spin_lock(&zhdr->page_lock); 364 } 201 } 365 202 366 /* !! 203 /* Try to lock a z3fold page */ 367 * Encodes the handle of a particular buddy wi !! 204 static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 368 * Zhdr->page_lock should be held as this func << 369 * if bud != HEADLESS. << 370 */ << 371 static unsigned long __encode_handle(struct z3 << 372 struct z3fold_ << 373 enum buddy bud << 374 { 205 { 375 unsigned long h = (unsigned long)zhdr; !! 206 return spin_trylock(&zhdr->page_lock); 376 int idx = 0; !! 207 } 377 << 378 /* << 379 * For a headless page, its handle is << 380 * PAGE_HEADLESS bit set << 381 */ << 382 if (bud == HEADLESS) << 383 return h | (1 << PAGE_HEADLESS << 384 208 385 /* otherwise, return pointer to encode !! 209 /* Unlock a z3fold page */ 386 idx = __idx(zhdr, bud); !! 210 static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 387 h += idx; !! 211 { 388 if (bud == LAST) !! 212 spin_unlock(&zhdr->page_lock); 389 h |= (zhdr->last_chunks << BUD << 390 << 391 write_lock(&slots->lock); << 392 slots->slot[idx] = h; << 393 write_unlock(&slots->lock); << 394 return (unsigned long)&slots->slot[idx << 395 } 213 } 396 214 >> 215 /* >> 216 * Encodes the handle of a particular buddy within a z3fold page >> 217 * Pool lock should be held as this function accesses first_num >> 218 */ 397 static unsigned long encode_handle(struct z3fo 219 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 398 { 220 { 399 return __encode_handle(zhdr, zhdr->slo !! 221 unsigned long handle; >> 222 >> 223 handle = (unsigned long)zhdr; >> 224 if (bud != HEADLESS) >> 225 handle += (bud + zhdr->first_num) & BUDDY_MASK; >> 226 return handle; 400 } 227 } 401 228 402 /* only for LAST bud, returns zero otherwise * !! 229 /* Returns the z3fold page where a given handle is stored */ 403 static unsigned short handle_to_chunks(unsigne !! 230 static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) 404 { 231 { 405 struct z3fold_buddy_slots *slots = han !! 232 return (struct z3fold_header *)(handle & PAGE_MASK); 406 unsigned long addr; << 407 << 408 read_lock(&slots->lock); << 409 addr = *(unsigned long *)handle; << 410 read_unlock(&slots->lock); << 411 return (addr & ~PAGE_MASK) >> BUDDY_SH << 412 } 233 } 413 234 414 /* 235 /* 415 * (handle & BUDDY_MASK) < zhdr->first_num is 236 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 416 * but that doesn't matter. because the maski 237 * but that doesn't matter. because the masking will result in the 417 * correct buddy number. 238 * correct buddy number. 418 */ 239 */ 419 static enum buddy handle_to_buddy(unsigned lon 240 static enum buddy handle_to_buddy(unsigned long handle) 420 { 241 { 421 struct z3fold_header *zhdr; !! 242 struct z3fold_header *zhdr = handle_to_z3fold_header(handle); 422 struct z3fold_buddy_slots *slots = han !! 243 return (handle - zhdr->first_num) & BUDDY_MASK; 423 unsigned long addr; << 424 << 425 read_lock(&slots->lock); << 426 WARN_ON(handle & (1 << PAGE_HEADLESS)) << 427 addr = *(unsigned long *)handle; << 428 read_unlock(&slots->lock); << 429 zhdr = (struct z3fold_header *)(addr & << 430 return (addr - zhdr->first_num) & BUDD << 431 } << 432 << 433 static inline struct z3fold_pool *zhdr_to_pool << 434 { << 435 return zhdr->pool; << 436 } 244 } 437 245 438 static void __release_z3fold_page(struct z3fol 246 static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 439 { 247 { 440 struct page *page = virt_to_page(zhdr) 248 struct page *page = virt_to_page(zhdr); 441 struct z3fold_pool *pool = zhdr_to_poo !! 249 struct z3fold_pool *pool = zhdr->pool; 442 250 443 WARN_ON(!list_empty(&zhdr->buddy)); 251 WARN_ON(!list_empty(&zhdr->buddy)); 444 set_bit(PAGE_STALE, &page->private); 252 set_bit(PAGE_STALE, &page->private); 445 clear_bit(NEEDS_COMPACTING, &page->pri 253 clear_bit(NEEDS_COMPACTING, &page->private); 446 spin_lock(&pool->lock); 254 spin_lock(&pool->lock); >> 255 if (!list_empty(&page->lru)) >> 256 list_del(&page->lru); 447 spin_unlock(&pool->lock); 257 spin_unlock(&pool->lock); 448 << 449 if (locked) 258 if (locked) 450 z3fold_page_unlock(zhdr); 259 z3fold_page_unlock(zhdr); 451 << 452 spin_lock(&pool->stale_lock); 260 spin_lock(&pool->stale_lock); 453 list_add(&zhdr->buddy, &pool->stale); 261 list_add(&zhdr->buddy, &pool->stale); 454 queue_work(pool->release_wq, &pool->wo 262 queue_work(pool->release_wq, &pool->work); 455 spin_unlock(&pool->stale_lock); 263 spin_unlock(&pool->stale_lock); >> 264 } 456 265 457 atomic64_dec(&pool->pages_nr); !! 266 static void __attribute__((__unused__)) >> 267 release_z3fold_page(struct kref *ref) >> 268 { >> 269 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, >> 270 refcount); >> 271 __release_z3fold_page(zhdr, false); 458 } 272 } 459 273 460 static void release_z3fold_page_locked(struct 274 static void release_z3fold_page_locked(struct kref *ref) 461 { 275 { 462 struct z3fold_header *zhdr = container 276 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 463 277 refcount); 464 WARN_ON(z3fold_page_trylock(zhdr)); 278 WARN_ON(z3fold_page_trylock(zhdr)); 465 __release_z3fold_page(zhdr, true); 279 __release_z3fold_page(zhdr, true); 466 } 280 } 467 281 468 static void release_z3fold_page_locked_list(st 282 static void release_z3fold_page_locked_list(struct kref *ref) 469 { 283 { 470 struct z3fold_header *zhdr = container 284 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 471 285 refcount); 472 struct z3fold_pool *pool = zhdr_to_poo !! 286 spin_lock(&zhdr->pool->lock); 473 << 474 spin_lock(&pool->lock); << 475 list_del_init(&zhdr->buddy); 287 list_del_init(&zhdr->buddy); 476 spin_unlock(&pool->lock); !! 288 spin_unlock(&zhdr->pool->lock); 477 289 478 WARN_ON(z3fold_page_trylock(zhdr)); 290 WARN_ON(z3fold_page_trylock(zhdr)); 479 __release_z3fold_page(zhdr, true); 291 __release_z3fold_page(zhdr, true); 480 } 292 } 481 293 482 static inline int put_z3fold_locked(struct z3f << 483 { << 484 return kref_put(&zhdr->refcount, relea << 485 } << 486 << 487 static inline int put_z3fold_locked_list(struc << 488 { << 489 return kref_put(&zhdr->refcount, relea << 490 } << 491 << 492 static void free_pages_work(struct work_struct 294 static void free_pages_work(struct work_struct *w) 493 { 295 { 494 struct z3fold_pool *pool = container_o 296 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 495 297 496 spin_lock(&pool->stale_lock); 298 spin_lock(&pool->stale_lock); 497 while (!list_empty(&pool->stale)) { 299 while (!list_empty(&pool->stale)) { 498 struct z3fold_header *zhdr = l 300 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 499 301 struct z3fold_header, buddy); 500 struct page *page = virt_to_pa 302 struct page *page = virt_to_page(zhdr); 501 303 502 list_del(&zhdr->buddy); 304 list_del(&zhdr->buddy); 503 if (WARN_ON(!test_bit(PAGE_STA 305 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 504 continue; 306 continue; 505 spin_unlock(&pool->stale_lock) 307 spin_unlock(&pool->stale_lock); 506 cancel_work_sync(&zhdr->work); 308 cancel_work_sync(&zhdr->work); 507 free_z3fold_page(page, false); !! 309 free_z3fold_page(page); 508 cond_resched(); 310 cond_resched(); 509 spin_lock(&pool->stale_lock); 311 spin_lock(&pool->stale_lock); 510 } 312 } 511 spin_unlock(&pool->stale_lock); 313 spin_unlock(&pool->stale_lock); 512 } 314 } 513 315 514 /* 316 /* 515 * Returns the number of free chunks in a z3fo 317 * Returns the number of free chunks in a z3fold page. 516 * NB: can't be used with HEADLESS pages. 318 * NB: can't be used with HEADLESS pages. 517 */ 319 */ 518 static int num_free_chunks(struct z3fold_heade 320 static int num_free_chunks(struct z3fold_header *zhdr) 519 { 321 { 520 int nfree; 322 int nfree; 521 /* 323 /* 522 * If there is a middle object, pick u 324 * If there is a middle object, pick up the bigger free space 523 * either before or after it. Otherwis 325 * either before or after it. Otherwise just subtract the number 524 * of chunks occupied by the first and 326 * of chunks occupied by the first and the last objects. 525 */ 327 */ 526 if (zhdr->middle_chunks != 0) { 328 if (zhdr->middle_chunks != 0) { 527 int nfree_before = zhdr->first 329 int nfree_before = zhdr->first_chunks ? 528 0 : zhdr->start_middle 330 0 : zhdr->start_middle - ZHDR_CHUNKS; 529 int nfree_after = zhdr->last_c 331 int nfree_after = zhdr->last_chunks ? 530 0 : TOTAL_CHUNKS - 332 0 : TOTAL_CHUNKS - 531 (zhdr->start_m 333 (zhdr->start_middle + zhdr->middle_chunks); 532 nfree = max(nfree_before, nfre 334 nfree = max(nfree_before, nfree_after); 533 } else 335 } else 534 nfree = NCHUNKS - zhdr->first_ 336 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 535 return nfree; 337 return nfree; 536 } 338 } 537 339 538 /* Add to the appropriate unbuddied list */ << 539 static inline void add_to_unbuddied(struct z3f << 540 struct z3fold_ << 541 { << 542 if (zhdr->first_chunks == 0 || zhdr->l << 543 zhdr->middle_chunks == << 544 struct list_head *unbuddied; << 545 int freechunks = num_free_chun << 546 << 547 migrate_disable(); << 548 unbuddied = this_cpu_ptr(pool- << 549 spin_lock(&pool->lock); << 550 list_add(&zhdr->buddy, &unbudd << 551 spin_unlock(&pool->lock); << 552 zhdr->cpu = smp_processor_id() << 553 migrate_enable(); << 554 } << 555 } << 556 << 557 static inline enum buddy get_free_buddy(struct << 558 { << 559 enum buddy bud = HEADLESS; << 560 << 561 if (zhdr->middle_chunks) { << 562 if (!zhdr->first_chunks && << 563 chunks <= zhdr->start_midd << 564 bud = FIRST; << 565 else if (!zhdr->last_chunks) << 566 bud = LAST; << 567 } else { << 568 if (!zhdr->first_chunks) << 569 bud = FIRST; << 570 else if (!zhdr->last_chunks) << 571 bud = LAST; << 572 else << 573 bud = MIDDLE; << 574 } << 575 << 576 return bud; << 577 } << 578 << 579 static inline void *mchunk_memmove(struct z3fo 340 static inline void *mchunk_memmove(struct z3fold_header *zhdr, 580 unsigned short 341 unsigned short dst_chunk) 581 { 342 { 582 void *beg = zhdr; 343 void *beg = zhdr; 583 return memmove(beg + (dst_chunk << CHU 344 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 584 beg + (zhdr->start_midd 345 beg + (zhdr->start_middle << CHUNK_SHIFT), 585 zhdr->middle_chunks << 346 zhdr->middle_chunks << CHUNK_SHIFT); 586 } 347 } 587 348 588 static inline bool buddy_single(struct z3fold_ << 589 { << 590 return !((zhdr->first_chunks && zhdr-> << 591 (zhdr->first_chunks && << 592 (zhdr->middle_chunks & << 593 } << 594 << 595 static struct z3fold_header *compact_single_bu << 596 { << 597 struct z3fold_pool *pool = zhdr_to_poo << 598 void *p = zhdr; << 599 unsigned long old_handle = 0; << 600 size_t sz = 0; << 601 struct z3fold_header *new_zhdr = NULL; << 602 int first_idx = __idx(zhdr, FIRST); << 603 int middle_idx = __idx(zhdr, MIDDLE); << 604 int last_idx = __idx(zhdr, LAST); << 605 unsigned short *moved_chunks = NULL; << 606 << 607 /* << 608 * No need to protect slots here -- al << 609 * the page lock is already taken << 610 */ << 611 if (zhdr->first_chunks && zhdr->slots- << 612 p += ZHDR_SIZE_ALIGNED; << 613 sz = zhdr->first_chunks << CHU << 614 old_handle = (unsigned long)&z << 615 moved_chunks = &zhdr->first_ch << 616 } else if (zhdr->middle_chunks && zhdr << 617 p += zhdr->start_middle << CHU << 618 sz = zhdr->middle_chunks << CH << 619 old_handle = (unsigned long)&z << 620 moved_chunks = &zhdr->middle_c << 621 } else if (zhdr->last_chunks && zhdr-> << 622 p += PAGE_SIZE - (zhdr->last_c << 623 sz = zhdr->last_chunks << CHUN << 624 old_handle = (unsigned long)&z << 625 moved_chunks = &zhdr->last_chu << 626 } << 627 << 628 if (sz > 0) { << 629 enum buddy new_bud = HEADLESS; << 630 short chunks = size_to_chunks( << 631 void *q; << 632 << 633 new_zhdr = __z3fold_alloc(pool << 634 if (!new_zhdr) << 635 return NULL; << 636 << 637 if (WARN_ON(new_zhdr == zhdr)) << 638 goto out_fail; << 639 << 640 new_bud = get_free_buddy(new_z << 641 q = new_zhdr; << 642 switch (new_bud) { << 643 case FIRST: << 644 new_zhdr->first_chunks << 645 q += ZHDR_SIZE_ALIGNED << 646 break; << 647 case MIDDLE: << 648 new_zhdr->middle_chunk << 649 new_zhdr->start_middle << 650 new_zhdr->firs << 651 q += new_zhdr->start_m << 652 break; << 653 case LAST: << 654 new_zhdr->last_chunks << 655 q += PAGE_SIZE - (new_ << 656 break; << 657 default: << 658 goto out_fail; << 659 } << 660 new_zhdr->foreign_handles++; << 661 memcpy(q, p, sz); << 662 write_lock(&zhdr->slots->lock) << 663 *(unsigned long *)old_handle = << 664 __idx(new_zhdr, new_bu << 665 if (new_bud == LAST) << 666 *(unsigned long *)old_ << 667 (new_z << 668 write_unlock(&zhdr->slots->loc << 669 add_to_unbuddied(pool, new_zhd << 670 z3fold_page_unlock(new_zhdr); << 671 << 672 *moved_chunks = 0; << 673 } << 674 << 675 return new_zhdr; << 676 << 677 out_fail: << 678 if (new_zhdr && !put_z3fold_locked(new << 679 add_to_unbuddied(pool, new_zhd << 680 z3fold_page_unlock(new_zhdr); << 681 } << 682 return NULL; << 683 << 684 } << 685 << 686 #define BIG_CHUNK_GAP 3 349 #define BIG_CHUNK_GAP 3 687 /* Has to be called with lock held */ 350 /* Has to be called with lock held */ 688 static int z3fold_compact_page(struct z3fold_h 351 static int z3fold_compact_page(struct z3fold_header *zhdr) 689 { 352 { 690 struct page *page = virt_to_page(zhdr) 353 struct page *page = virt_to_page(zhdr); 691 354 692 if (test_bit(MIDDLE_CHUNK_MAPPED, &pag 355 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 693 return 0; /* can't move middle 356 return 0; /* can't move middle chunk, it's used */ 694 357 695 if (unlikely(PageIsolated(page))) << 696 return 0; << 697 << 698 if (zhdr->middle_chunks == 0) 358 if (zhdr->middle_chunks == 0) 699 return 0; /* nothing to compac 359 return 0; /* nothing to compact */ 700 360 701 if (zhdr->first_chunks == 0 && zhdr->l 361 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 702 /* move to the beginning */ 362 /* move to the beginning */ 703 mchunk_memmove(zhdr, ZHDR_CHUN 363 mchunk_memmove(zhdr, ZHDR_CHUNKS); 704 zhdr->first_chunks = zhdr->mid 364 zhdr->first_chunks = zhdr->middle_chunks; 705 zhdr->middle_chunks = 0; 365 zhdr->middle_chunks = 0; 706 zhdr->start_middle = 0; 366 zhdr->start_middle = 0; 707 zhdr->first_num++; 367 zhdr->first_num++; 708 return 1; 368 return 1; 709 } 369 } 710 370 711 /* 371 /* 712 * moving data is expensive, so let's 372 * moving data is expensive, so let's only do that if 713 * there's substantial gain (at least 373 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 714 */ 374 */ 715 if (zhdr->first_chunks != 0 && zhdr->l 375 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 716 zhdr->start_middle - (zhdr->first_ 376 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 717 BIG_CHUNK_GAP) { 377 BIG_CHUNK_GAP) { 718 mchunk_memmove(zhdr, zhdr->fir 378 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 719 zhdr->start_middle = zhdr->fir 379 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 720 return 1; 380 return 1; 721 } else if (zhdr->last_chunks != 0 && z 381 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 722 TOTAL_CHUNKS - (zhdr->last_ 382 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 723 + zhdr 383 + zhdr->middle_chunks) >= 724 BIG_CHUNK_GAP) { 384 BIG_CHUNK_GAP) { 725 unsigned short new_start = TOT 385 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 726 zhdr->middle_chunks; 386 zhdr->middle_chunks; 727 mchunk_memmove(zhdr, new_start 387 mchunk_memmove(zhdr, new_start); 728 zhdr->start_middle = new_start 388 zhdr->start_middle = new_start; 729 return 1; 389 return 1; 730 } 390 } 731 391 732 return 0; 392 return 0; 733 } 393 } 734 394 735 static void do_compact_page(struct z3fold_head 395 static void do_compact_page(struct z3fold_header *zhdr, bool locked) 736 { 396 { 737 struct z3fold_pool *pool = zhdr_to_poo !! 397 struct z3fold_pool *pool = zhdr->pool; 738 struct page *page; 398 struct page *page; >> 399 struct list_head *unbuddied; >> 400 int fchunks; 739 401 740 page = virt_to_page(zhdr); 402 page = virt_to_page(zhdr); 741 if (locked) 403 if (locked) 742 WARN_ON(z3fold_page_trylock(zh 404 WARN_ON(z3fold_page_trylock(zhdr)); 743 else 405 else 744 z3fold_page_lock(zhdr); 406 z3fold_page_lock(zhdr); 745 if (WARN_ON(!test_and_clear_bit(NEEDS_ 407 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 746 z3fold_page_unlock(zhdr); 408 z3fold_page_unlock(zhdr); 747 return; 409 return; 748 } 410 } 749 spin_lock(&pool->lock); 411 spin_lock(&pool->lock); 750 list_del_init(&zhdr->buddy); 412 list_del_init(&zhdr->buddy); 751 spin_unlock(&pool->lock); 413 spin_unlock(&pool->lock); 752 414 753 if (put_z3fold_locked(zhdr)) !! 415 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 754 return; !! 416 atomic64_dec(&pool->pages_nr); 755 << 756 if (test_bit(PAGE_STALE, &page->privat << 757 test_and_set_bit(PAGE_CLAIMED, &pa << 758 z3fold_page_unlock(zhdr); << 759 return; << 760 } << 761 << 762 if (!zhdr->foreign_handles && buddy_si << 763 zhdr->mapped_count == 0 && compact << 764 if (!put_z3fold_locked(zhdr)) << 765 clear_bit(PAGE_CLAIMED << 766 z3fold_page_unlock(zhd << 767 } << 768 return; 417 return; 769 } 418 } 770 419 771 z3fold_compact_page(zhdr); 420 z3fold_compact_page(zhdr); 772 add_to_unbuddied(pool, zhdr); !! 421 unbuddied = get_cpu_ptr(pool->unbuddied); 773 clear_bit(PAGE_CLAIMED, &page->private !! 422 fchunks = num_free_chunks(zhdr); >> 423 if (fchunks < NCHUNKS && >> 424 (!zhdr->first_chunks || !zhdr->middle_chunks || >> 425 !zhdr->last_chunks)) { >> 426 /* the page's not completely free and it's unbuddied */ >> 427 spin_lock(&pool->lock); >> 428 list_add(&zhdr->buddy, &unbuddied[fchunks]); >> 429 spin_unlock(&pool->lock); >> 430 zhdr->cpu = smp_processor_id(); >> 431 } >> 432 put_cpu_ptr(pool->unbuddied); 774 z3fold_page_unlock(zhdr); 433 z3fold_page_unlock(zhdr); 775 } 434 } 776 435 777 static void compact_page_work(struct work_stru 436 static void compact_page_work(struct work_struct *w) 778 { 437 { 779 struct z3fold_header *zhdr = container 438 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 780 439 work); 781 440 782 do_compact_page(zhdr, false); 441 do_compact_page(zhdr, false); 783 } 442 } 784 443 785 /* returns _locked_ z3fold page header or NULL << 786 static inline struct z3fold_header *__z3fold_a << 787 << 788 { << 789 struct z3fold_header *zhdr = NULL; << 790 struct page *page; << 791 struct list_head *unbuddied; << 792 int chunks = size_to_chunks(size), i; << 793 << 794 lookup: << 795 migrate_disable(); << 796 /* First, try to find an unbuddied z3f << 797 unbuddied = this_cpu_ptr(pool->unbuddi << 798 for_each_unbuddied_list(i, chunks) { << 799 struct list_head *l = &unbuddi << 800 << 801 zhdr = list_first_entry_or_nul << 802 struct << 803 << 804 if (!zhdr) << 805 continue; << 806 << 807 /* Re-check under lock. */ << 808 spin_lock(&pool->lock); << 809 if (unlikely(zhdr != list_firs << 810 << 811 !z3fold_page_trylock(zhdr) << 812 spin_unlock(&pool->loc << 813 zhdr = NULL; << 814 migrate_enable(); << 815 if (can_sleep) << 816 cond_resched() << 817 goto lookup; << 818 } << 819 list_del_init(&zhdr->buddy); << 820 zhdr->cpu = -1; << 821 spin_unlock(&pool->lock); << 822 << 823 page = virt_to_page(zhdr); << 824 if (test_bit(NEEDS_COMPACTING, << 825 test_bit(PAGE_CLAIMED, &pa << 826 z3fold_page_unlock(zhd << 827 zhdr = NULL; << 828 migrate_enable(); << 829 if (can_sleep) << 830 cond_resched() << 831 goto lookup; << 832 } << 833 << 834 /* << 835 * this page could not be remo << 836 * list while pool lock was he << 837 * page lock so kref_put could << 838 * we got here, so it's safe t << 839 */ << 840 kref_get(&zhdr->refcount); << 841 break; << 842 } << 843 migrate_enable(); << 844 << 845 if (!zhdr) { << 846 int cpu; << 847 << 848 /* look for _exact_ match on o << 849 for_each_online_cpu(cpu) { << 850 struct list_head *l; << 851 << 852 unbuddied = per_cpu_pt << 853 spin_lock(&pool->lock) << 854 l = &unbuddied[chunks] << 855 << 856 zhdr = list_first_entr << 857 << 858 << 859 if (!zhdr || !z3fold_p << 860 spin_unlock(&p << 861 zhdr = NULL; << 862 continue; << 863 } << 864 list_del_init(&zhdr->b << 865 zhdr->cpu = -1; << 866 spin_unlock(&pool->loc << 867 << 868 page = virt_to_page(zh << 869 if (test_bit(NEEDS_COM << 870 test_bit(PAGE_CLAI << 871 z3fold_page_un << 872 zhdr = NULL; << 873 if (can_sleep) << 874 cond_r << 875 continue; << 876 } << 877 kref_get(&zhdr->refcou << 878 break; << 879 } << 880 } << 881 << 882 if (zhdr && !zhdr->slots) { << 883 zhdr->slots = alloc_slots(pool << 884 if (!zhdr->slots) << 885 goto out_fail; << 886 } << 887 return zhdr; << 888 << 889 out_fail: << 890 if (!put_z3fold_locked(zhdr)) { << 891 add_to_unbuddied(pool, zhdr); << 892 z3fold_page_unlock(zhdr); << 893 } << 894 return NULL; << 895 } << 896 444 897 /* 445 /* 898 * API Functions 446 * API Functions 899 */ 447 */ 900 448 901 /** 449 /** 902 * z3fold_create_pool() - create a new z3fold 450 * z3fold_create_pool() - create a new z3fold pool 903 * @name: pool name 451 * @name: pool name 904 * @gfp: gfp flags when allocating the 452 * @gfp: gfp flags when allocating the z3fold pool structure >> 453 * @ops: user-defined operations for the z3fold pool 905 * 454 * 906 * Return: pointer to the new z3fold pool or N 455 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 907 * failed. 456 * failed. 908 */ 457 */ 909 static struct z3fold_pool *z3fold_create_pool( !! 458 static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, >> 459 const struct z3fold_ops *ops) 910 { 460 { 911 struct z3fold_pool *pool = NULL; 461 struct z3fold_pool *pool = NULL; 912 int i, cpu; 462 int i, cpu; 913 463 914 pool = kzalloc(sizeof(struct z3fold_po 464 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 915 if (!pool) 465 if (!pool) 916 goto out; 466 goto out; 917 pool->c_handle = kmem_cache_create("z3 << 918 sizeof(struct << 919 SLOTS_ALIGN, 0 << 920 if (!pool->c_handle) << 921 goto out_c; << 922 spin_lock_init(&pool->lock); 467 spin_lock_init(&pool->lock); 923 spin_lock_init(&pool->stale_lock); 468 spin_lock_init(&pool->stale_lock); 924 pool->unbuddied = __alloc_percpu(sizeo !! 469 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 925 __ali << 926 if (!pool->unbuddied) << 927 goto out_pool; << 928 for_each_possible_cpu(cpu) { 470 for_each_possible_cpu(cpu) { 929 struct list_head *unbuddied = 471 struct list_head *unbuddied = 930 per_cpu_ptr(po 472 per_cpu_ptr(pool->unbuddied, cpu); 931 for_each_unbuddied_list(i, 0) 473 for_each_unbuddied_list(i, 0) 932 INIT_LIST_HEAD(&unbudd 474 INIT_LIST_HEAD(&unbuddied[i]); 933 } 475 } >> 476 INIT_LIST_HEAD(&pool->lru); 934 INIT_LIST_HEAD(&pool->stale); 477 INIT_LIST_HEAD(&pool->stale); 935 atomic64_set(&pool->pages_nr, 0); 478 atomic64_set(&pool->pages_nr, 0); 936 pool->name = name; 479 pool->name = name; 937 pool->compact_wq = create_singlethread 480 pool->compact_wq = create_singlethread_workqueue(pool->name); 938 if (!pool->compact_wq) 481 if (!pool->compact_wq) 939 goto out_unbuddied; !! 482 goto out; 940 pool->release_wq = create_singlethread 483 pool->release_wq = create_singlethread_workqueue(pool->name); 941 if (!pool->release_wq) 484 if (!pool->release_wq) 942 goto out_wq; 485 goto out_wq; 943 INIT_WORK(&pool->work, free_pages_work 486 INIT_WORK(&pool->work, free_pages_work); >> 487 pool->ops = ops; 944 return pool; 488 return pool; 945 489 946 out_wq: 490 out_wq: 947 destroy_workqueue(pool->compact_wq); 491 destroy_workqueue(pool->compact_wq); 948 out_unbuddied: << 949 free_percpu(pool->unbuddied); << 950 out_pool: << 951 kmem_cache_destroy(pool->c_handle); << 952 out_c: << 953 kfree(pool); << 954 out: 492 out: >> 493 kfree(pool); 955 return NULL; 494 return NULL; 956 } 495 } 957 496 958 /** 497 /** 959 * z3fold_destroy_pool() - destroys an existin 498 * z3fold_destroy_pool() - destroys an existing z3fold pool 960 * @pool: the z3fold pool to be destroye 499 * @pool: the z3fold pool to be destroyed 961 * 500 * 962 * The pool should be emptied before this func 501 * The pool should be emptied before this function is called. 963 */ 502 */ 964 static void z3fold_destroy_pool(struct z3fold_ 503 static void z3fold_destroy_pool(struct z3fold_pool *pool) 965 { 504 { 966 kmem_cache_destroy(pool->c_handle); << 967 << 968 /* << 969 * We need to destroy pool->compact_wq << 970 * as any pending work on pool->compac << 971 * queue_work(pool->release_wq, &pool- << 972 * << 973 * There are still outstanding pages u << 974 * so we cannot unregister migration u << 975 */ << 976 << 977 destroy_workqueue(pool->compact_wq); << 978 destroy_workqueue(pool->release_wq); 505 destroy_workqueue(pool->release_wq); 979 free_percpu(pool->unbuddied); !! 506 destroy_workqueue(pool->compact_wq); 980 kfree(pool); 507 kfree(pool); 981 } 508 } 982 509 983 static const struct movable_operations z3fold_ << 984 << 985 /** 510 /** 986 * z3fold_alloc() - allocates a region of a gi 511 * z3fold_alloc() - allocates a region of a given size 987 * @pool: z3fold pool from which to allo 512 * @pool: z3fold pool from which to allocate 988 * @size: size in bytes of the desired a 513 * @size: size in bytes of the desired allocation 989 * @gfp: gfp flags used if the pool nee 514 * @gfp: gfp flags used if the pool needs to grow 990 * @handle: handle of the new allocation 515 * @handle: handle of the new allocation 991 * 516 * 992 * This function will attempt to find a free r 517 * This function will attempt to find a free region in the pool large enough to 993 * satisfy the allocation request. A search o 518 * satisfy the allocation request. A search of the unbuddied lists is 994 * performed first. If no suitable free region 519 * performed first. If no suitable free region is found, then a new page is 995 * allocated and added to the pool to satisfy 520 * allocated and added to the pool to satisfy the request. 996 * 521 * >> 522 * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used >> 523 * as z3fold pool pages. >> 524 * 997 * Return: 0 if success and handle is set, oth 525 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 998 * gfp arguments are invalid or -ENOMEM if the 526 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 999 * a new page. 527 * a new page. 1000 */ 528 */ 1001 static int z3fold_alloc(struct z3fold_pool *p 529 static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 1002 unsigned long *handle 530 unsigned long *handle) 1003 { 531 { 1004 int chunks = size_to_chunks(size); !! 532 int chunks = 0, i, freechunks; 1005 struct z3fold_header *zhdr = NULL; 533 struct z3fold_header *zhdr = NULL; 1006 struct page *page = NULL; 534 struct page *page = NULL; 1007 enum buddy bud; 535 enum buddy bud; 1008 bool can_sleep = gfpflags_allow_block !! 536 bool can_sleep = (gfp & __GFP_RECLAIM) == __GFP_RECLAIM; 1009 537 1010 if (!size || (gfp & __GFP_HIGHMEM)) 538 if (!size || (gfp & __GFP_HIGHMEM)) 1011 return -EINVAL; 539 return -EINVAL; 1012 540 1013 if (size > PAGE_SIZE) 541 if (size > PAGE_SIZE) 1014 return -ENOSPC; 542 return -ENOSPC; 1015 543 1016 if (size > PAGE_SIZE - ZHDR_SIZE_ALIG 544 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 1017 bud = HEADLESS; 545 bud = HEADLESS; 1018 else { 546 else { 1019 retry: !! 547 struct list_head *unbuddied; 1020 zhdr = __z3fold_alloc(pool, s !! 548 chunks = size_to_chunks(size); >> 549 >> 550 lookup: >> 551 /* First, try to find an unbuddied z3fold page. */ >> 552 unbuddied = get_cpu_ptr(pool->unbuddied); >> 553 for_each_unbuddied_list(i, chunks) { >> 554 struct list_head *l = &unbuddied[i]; >> 555 >> 556 zhdr = list_first_entry_or_null(READ_ONCE(l), >> 557 struct z3fold_header, buddy); >> 558 >> 559 if (!zhdr) >> 560 continue; >> 561 >> 562 /* Re-check under lock. */ >> 563 spin_lock(&pool->lock); >> 564 l = &unbuddied[i]; >> 565 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), >> 566 struct z3fold_header, buddy)) || >> 567 !z3fold_page_trylock(zhdr)) { >> 568 spin_unlock(&pool->lock); >> 569 put_cpu_ptr(pool->unbuddied); >> 570 goto lookup; >> 571 } >> 572 list_del_init(&zhdr->buddy); >> 573 zhdr->cpu = -1; >> 574 spin_unlock(&pool->lock); >> 575 >> 576 page = virt_to_page(zhdr); >> 577 if (test_bit(NEEDS_COMPACTING, &page->private)) { >> 578 z3fold_page_unlock(zhdr); >> 579 zhdr = NULL; >> 580 put_cpu_ptr(pool->unbuddied); >> 581 if (can_sleep) >> 582 cond_resched(); >> 583 goto lookup; >> 584 } >> 585 >> 586 /* >> 587 * this page could not be removed from its unbuddied >> 588 * list while pool lock was held, and then we've taken >> 589 * page lock so kref_put could not be called before >> 590 * we got here, so it's safe to just call kref_get() >> 591 */ >> 592 kref_get(&zhdr->refcount); >> 593 break; >> 594 } >> 595 put_cpu_ptr(pool->unbuddied); >> 596 1021 if (zhdr) { 597 if (zhdr) { 1022 bud = get_free_buddy( !! 598 if (zhdr->first_chunks == 0) { 1023 if (bud == HEADLESS) !! 599 if (zhdr->middle_chunks != 0 && 1024 if (!put_z3fo !! 600 chunks >= zhdr->start_middle) >> 601 bud = LAST; >> 602 else >> 603 bud = FIRST; >> 604 } else if (zhdr->last_chunks == 0) >> 605 bud = LAST; >> 606 else if (zhdr->middle_chunks == 0) >> 607 bud = MIDDLE; >> 608 else { >> 609 if (kref_put(&zhdr->refcount, >> 610 release_z3fold_page_locked)) >> 611 atomic64_dec(&pool->pages_nr); >> 612 else 1025 z3fol 613 z3fold_page_unlock(zhdr); 1026 pr_err("No fr 614 pr_err("No free chunks in unbuddied\n"); 1027 WARN_ON(1); 615 WARN_ON(1); 1028 goto retry; !! 616 goto lookup; 1029 } 617 } 1030 page = virt_to_page(z << 1031 goto found; 618 goto found; 1032 } 619 } 1033 bud = FIRST; 620 bud = FIRST; 1034 } 621 } 1035 622 1036 page = alloc_page(gfp); !! 623 spin_lock(&pool->stale_lock); >> 624 zhdr = list_first_entry_or_null(&pool->stale, >> 625 struct z3fold_header, buddy); >> 626 /* >> 627 * Before allocating a page, let's see if we can take one from the >> 628 * stale pages list. cancel_work_sync() can sleep so we must make >> 629 * sure it won't be called in case we're in atomic context. >> 630 */ >> 631 if (zhdr && (can_sleep || !work_pending(&zhdr->work))) { >> 632 list_del(&zhdr->buddy); >> 633 spin_unlock(&pool->stale_lock); >> 634 if (can_sleep) >> 635 cancel_work_sync(&zhdr->work); >> 636 page = virt_to_page(zhdr); >> 637 } else { >> 638 spin_unlock(&pool->stale_lock); >> 639 page = alloc_page(gfp); >> 640 } >> 641 1037 if (!page) 642 if (!page) 1038 return -ENOMEM; 643 return -ENOMEM; 1039 644 1040 zhdr = init_z3fold_page(page, bud == << 1041 if (!zhdr) { << 1042 __free_page(page); << 1043 return -ENOMEM; << 1044 } << 1045 atomic64_inc(&pool->pages_nr); 645 atomic64_inc(&pool->pages_nr); >> 646 zhdr = init_z3fold_page(page, pool); 1046 647 1047 if (bud == HEADLESS) { 648 if (bud == HEADLESS) { 1048 set_bit(PAGE_HEADLESS, &page- 649 set_bit(PAGE_HEADLESS, &page->private); 1049 goto headless; 650 goto headless; 1050 } 651 } 1051 if (can_sleep) { << 1052 lock_page(page); << 1053 __SetPageMovable(page, &z3fol << 1054 unlock_page(page); << 1055 } else { << 1056 WARN_ON(!trylock_page(page)); << 1057 __SetPageMovable(page, &z3fol << 1058 unlock_page(page); << 1059 } << 1060 z3fold_page_lock(zhdr); 652 z3fold_page_lock(zhdr); 1061 653 1062 found: 654 found: 1063 if (bud == FIRST) 655 if (bud == FIRST) 1064 zhdr->first_chunks = chunks; 656 zhdr->first_chunks = chunks; 1065 else if (bud == LAST) 657 else if (bud == LAST) 1066 zhdr->last_chunks = chunks; 658 zhdr->last_chunks = chunks; 1067 else { 659 else { 1068 zhdr->middle_chunks = chunks; 660 zhdr->middle_chunks = chunks; 1069 zhdr->start_middle = zhdr->fi 661 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 1070 } 662 } 1071 add_to_unbuddied(pool, zhdr); !! 663 >> 664 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || >> 665 zhdr->middle_chunks == 0) { >> 666 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); >> 667 >> 668 /* Add to unbuddied list */ >> 669 freechunks = num_free_chunks(zhdr); >> 670 spin_lock(&pool->lock); >> 671 list_add(&zhdr->buddy, &unbuddied[freechunks]); >> 672 spin_unlock(&pool->lock); >> 673 zhdr->cpu = smp_processor_id(); >> 674 put_cpu_ptr(pool->unbuddied); >> 675 } 1072 676 1073 headless: 677 headless: 1074 spin_lock(&pool->lock); 678 spin_lock(&pool->lock); >> 679 /* Add/move z3fold page to beginning of LRU */ >> 680 if (!list_empty(&page->lru)) >> 681 list_del(&page->lru); >> 682 >> 683 list_add(&page->lru, &pool->lru); >> 684 1075 *handle = encode_handle(zhdr, bud); 685 *handle = encode_handle(zhdr, bud); 1076 spin_unlock(&pool->lock); 686 spin_unlock(&pool->lock); 1077 if (bud != HEADLESS) 687 if (bud != HEADLESS) 1078 z3fold_page_unlock(zhdr); 688 z3fold_page_unlock(zhdr); 1079 689 1080 return 0; 690 return 0; 1081 } 691 } 1082 692 1083 /** 693 /** 1084 * z3fold_free() - frees the allocation assoc 694 * z3fold_free() - frees the allocation associated with the given handle 1085 * @pool: pool in which the allocation 695 * @pool: pool in which the allocation resided 1086 * @handle: handle associated with the al 696 * @handle: handle associated with the allocation returned by z3fold_alloc() 1087 * 697 * 1088 * In the case that the z3fold page in which 698 * In the case that the z3fold page in which the allocation resides is under 1089 * reclaim, as indicated by the PAGE_CLAIMED !! 699 * reclaim, as indicated by the PG_reclaim flag being set, this function 1090 * only sets the first|middle|last_chunks to !! 700 * only sets the first|last_chunks to 0. The page is actually freed 1091 * once all buddies are evicted (see z3fold_r !! 701 * once both buddies are evicted (see z3fold_reclaim_page() below). 1092 */ 702 */ 1093 static void z3fold_free(struct z3fold_pool *p 703 static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 1094 { 704 { 1095 struct z3fold_header *zhdr; 705 struct z3fold_header *zhdr; 1096 struct page *page; 706 struct page *page; 1097 enum buddy bud; 707 enum buddy bud; 1098 bool page_claimed; << 1099 708 1100 zhdr = get_z3fold_header(handle); !! 709 zhdr = handle_to_z3fold_header(handle); 1101 page = virt_to_page(zhdr); 710 page = virt_to_page(zhdr); 1102 page_claimed = test_and_set_bit(PAGE_ << 1103 711 1104 if (test_bit(PAGE_HEADLESS, &page->pr 712 if (test_bit(PAGE_HEADLESS, &page->private)) { 1105 /* if a headless page is unde !! 713 /* HEADLESS page stored */ 1106 * NB: we use test_and_set_bi !! 714 bud = HEADLESS; 1107 * has not been set before, w !! 715 } else { 1108 * immediately so we don't ca !! 716 z3fold_page_lock(zhdr); 1109 */ !! 717 bud = handle_to_buddy(handle); 1110 if (!page_claimed) { !! 718 1111 put_z3fold_header(zhd !! 719 switch (bud) { 1112 free_z3fold_page(page !! 720 case FIRST: 1113 atomic64_dec(&pool->p !! 721 zhdr->first_chunks = 0; >> 722 break; >> 723 case MIDDLE: >> 724 zhdr->middle_chunks = 0; >> 725 zhdr->start_middle = 0; >> 726 break; >> 727 case LAST: >> 728 zhdr->last_chunks = 0; >> 729 break; >> 730 default: >> 731 pr_err("%s: unknown bud %d\n", __func__, bud); >> 732 WARN_ON(1); >> 733 z3fold_page_unlock(zhdr); >> 734 return; 1114 } 735 } 1115 return; << 1116 } 736 } 1117 737 1118 /* Non-headless case */ !! 738 if (bud == HEADLESS) { 1119 bud = handle_to_buddy(handle); !! 739 spin_lock(&pool->lock); 1120 !! 740 list_del(&page->lru); 1121 switch (bud) { !! 741 spin_unlock(&pool->lock); 1122 case FIRST: !! 742 free_z3fold_page(page); 1123 zhdr->first_chunks = 0; !! 743 atomic64_dec(&pool->pages_nr); 1124 break; << 1125 case MIDDLE: << 1126 zhdr->middle_chunks = 0; << 1127 break; << 1128 case LAST: << 1129 zhdr->last_chunks = 0; << 1130 break; << 1131 default: << 1132 pr_err("%s: unknown bud %d\n" << 1133 WARN_ON(1); << 1134 put_z3fold_header(zhdr); << 1135 return; 744 return; 1136 } 745 } 1137 746 1138 if (!page_claimed) !! 747 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 1139 free_handle(handle, zhdr); !! 748 atomic64_dec(&pool->pages_nr); 1140 if (put_z3fold_locked_list(zhdr)) << 1141 return; << 1142 if (page_claimed) { << 1143 /* the page has not been clai << 1144 put_z3fold_header(zhdr); << 1145 return; 749 return; 1146 } 750 } 1147 if (test_and_set_bit(NEEDS_COMPACTING 751 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1148 clear_bit(PAGE_CLAIMED, &page !! 752 z3fold_page_unlock(zhdr); 1149 put_z3fold_header(zhdr); << 1150 return; 753 return; 1151 } 754 } 1152 if (zhdr->cpu < 0 || !cpu_online(zhdr 755 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { >> 756 spin_lock(&pool->lock); >> 757 list_del_init(&zhdr->buddy); >> 758 spin_unlock(&pool->lock); 1153 zhdr->cpu = -1; 759 zhdr->cpu = -1; 1154 kref_get(&zhdr->refcount); 760 kref_get(&zhdr->refcount); 1155 clear_bit(PAGE_CLAIMED, &page << 1156 do_compact_page(zhdr, true); 761 do_compact_page(zhdr, true); 1157 return; 762 return; 1158 } 763 } 1159 kref_get(&zhdr->refcount); 764 kref_get(&zhdr->refcount); 1160 clear_bit(PAGE_CLAIMED, &page->privat << 1161 queue_work_on(zhdr->cpu, pool->compac 765 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1162 put_z3fold_header(zhdr); !! 766 z3fold_page_unlock(zhdr); >> 767 } >> 768 >> 769 /** >> 770 * z3fold_reclaim_page() - evicts allocations from a pool page and frees it >> 771 * @pool: pool from which a page will attempt to be evicted >> 772 * @retires: number of pages on the LRU list for which eviction will >> 773 * be attempted before failing >> 774 * >> 775 * z3fold reclaim is different from normal system reclaim in that it is done >> 776 * from the bottom, up. This is because only the bottom layer, z3fold, has >> 777 * information on how the allocations are organized within each z3fold page. >> 778 * This has the potential to create interesting locking situations between >> 779 * z3fold and the user, however. >> 780 * >> 781 * To avoid these, this is how z3fold_reclaim_page() should be called: >> 782 >> 783 * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). >> 784 * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and >> 785 * call the user-defined eviction handler with the pool and handle as >> 786 * arguments. >> 787 * >> 788 * If the handle can not be evicted, the eviction handler should return >> 789 * non-zero. z3fold_reclaim_page() will add the z3fold page back to the >> 790 * appropriate list and try the next z3fold page on the LRU up to >> 791 * a user defined number of retries. >> 792 * >> 793 * If the handle is successfully evicted, the eviction handler should >> 794 * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() >> 795 * contains logic to delay freeing the page if the page is under reclaim, >> 796 * as indicated by the setting of the PG_reclaim flag on the underlying page. >> 797 * >> 798 * If all buddies in the z3fold page are successfully evicted, then the >> 799 * z3fold page can be freed. >> 800 * >> 801 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are >> 802 * no pages to evict or an eviction handler is not registered, -EAGAIN if >> 803 * the retry limit was hit. >> 804 */ >> 805 static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) >> 806 { >> 807 int i, ret = 0; >> 808 struct z3fold_header *zhdr = NULL; >> 809 struct page *page = NULL; >> 810 struct list_head *pos; >> 811 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; >> 812 >> 813 spin_lock(&pool->lock); >> 814 if (!pool->ops || !pool->ops->evict || retries == 0) { >> 815 spin_unlock(&pool->lock); >> 816 return -EINVAL; >> 817 } >> 818 for (i = 0; i < retries; i++) { >> 819 if (list_empty(&pool->lru)) { >> 820 spin_unlock(&pool->lock); >> 821 return -EINVAL; >> 822 } >> 823 list_for_each_prev(pos, &pool->lru) { >> 824 page = list_entry(pos, struct page, lru); >> 825 if (test_bit(PAGE_HEADLESS, &page->private)) >> 826 /* candidate found */ >> 827 break; >> 828 >> 829 zhdr = page_address(page); >> 830 if (!z3fold_page_trylock(zhdr)) >> 831 continue; /* can't evict at this point */ >> 832 kref_get(&zhdr->refcount); >> 833 list_del_init(&zhdr->buddy); >> 834 zhdr->cpu = -1; >> 835 } >> 836 >> 837 list_del_init(&page->lru); >> 838 spin_unlock(&pool->lock); >> 839 >> 840 if (!test_bit(PAGE_HEADLESS, &page->private)) { >> 841 /* >> 842 * We need encode the handles before unlocking, since >> 843 * we can race with free that will set >> 844 * (first|last)_chunks to 0 >> 845 */ >> 846 first_handle = 0; >> 847 last_handle = 0; >> 848 middle_handle = 0; >> 849 if (zhdr->first_chunks) >> 850 first_handle = encode_handle(zhdr, FIRST); >> 851 if (zhdr->middle_chunks) >> 852 middle_handle = encode_handle(zhdr, MIDDLE); >> 853 if (zhdr->last_chunks) >> 854 last_handle = encode_handle(zhdr, LAST); >> 855 /* >> 856 * it's safe to unlock here because we hold a >> 857 * reference to this page >> 858 */ >> 859 z3fold_page_unlock(zhdr); >> 860 } else { >> 861 first_handle = encode_handle(zhdr, HEADLESS); >> 862 last_handle = middle_handle = 0; >> 863 } >> 864 >> 865 /* Issue the eviction callback(s) */ >> 866 if (middle_handle) { >> 867 ret = pool->ops->evict(pool, middle_handle); >> 868 if (ret) >> 869 goto next; >> 870 } >> 871 if (first_handle) { >> 872 ret = pool->ops->evict(pool, first_handle); >> 873 if (ret) >> 874 goto next; >> 875 } >> 876 if (last_handle) { >> 877 ret = pool->ops->evict(pool, last_handle); >> 878 if (ret) >> 879 goto next; >> 880 } >> 881 next: >> 882 spin_lock(&pool->lock); >> 883 if (test_bit(PAGE_HEADLESS, &page->private)) { >> 884 if (ret == 0) { >> 885 spin_unlock(&pool->lock); >> 886 free_z3fold_page(page); >> 887 return 0; >> 888 } >> 889 } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { >> 890 atomic64_dec(&pool->pages_nr); >> 891 spin_unlock(&pool->lock); >> 892 return 0; >> 893 } >> 894 >> 895 /* >> 896 * Add to the beginning of LRU. >> 897 * Pool lock has to be kept here to ensure the page has >> 898 * not already been released >> 899 */ >> 900 list_add(&page->lru, &pool->lru); >> 901 } >> 902 spin_unlock(&pool->lock); >> 903 return -EAGAIN; 1163 } 904 } 1164 905 1165 /** 906 /** 1166 * z3fold_map() - maps the allocation associa 907 * z3fold_map() - maps the allocation associated with the given handle 1167 * @pool: pool in which the allocation 908 * @pool: pool in which the allocation resides 1168 * @handle: handle associated with the al 909 * @handle: handle associated with the allocation to be mapped 1169 * 910 * 1170 * Extracts the buddy number from handle and 911 * Extracts the buddy number from handle and constructs the pointer to the 1171 * correct starting chunk within the page. 912 * correct starting chunk within the page. 1172 * 913 * 1173 * Returns: a pointer to the mapped allocatio 914 * Returns: a pointer to the mapped allocation 1174 */ 915 */ 1175 static void *z3fold_map(struct z3fold_pool *p 916 static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 1176 { 917 { 1177 struct z3fold_header *zhdr; 918 struct z3fold_header *zhdr; 1178 struct page *page; 919 struct page *page; 1179 void *addr; 920 void *addr; 1180 enum buddy buddy; 921 enum buddy buddy; 1181 922 1182 zhdr = get_z3fold_header(handle); !! 923 zhdr = handle_to_z3fold_header(handle); 1183 addr = zhdr; 924 addr = zhdr; 1184 page = virt_to_page(zhdr); 925 page = virt_to_page(zhdr); 1185 926 1186 if (test_bit(PAGE_HEADLESS, &page->pr 927 if (test_bit(PAGE_HEADLESS, &page->private)) 1187 goto out; 928 goto out; 1188 929 >> 930 z3fold_page_lock(zhdr); 1189 buddy = handle_to_buddy(handle); 931 buddy = handle_to_buddy(handle); 1190 switch (buddy) { 932 switch (buddy) { 1191 case FIRST: 933 case FIRST: 1192 addr += ZHDR_SIZE_ALIGNED; 934 addr += ZHDR_SIZE_ALIGNED; 1193 break; 935 break; 1194 case MIDDLE: 936 case MIDDLE: 1195 addr += zhdr->start_middle << 937 addr += zhdr->start_middle << CHUNK_SHIFT; 1196 set_bit(MIDDLE_CHUNK_MAPPED, 938 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1197 break; 939 break; 1198 case LAST: 940 case LAST: 1199 addr += PAGE_SIZE - (handle_t !! 941 addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 1200 break; 942 break; 1201 default: 943 default: 1202 pr_err("unknown buddy id %d\n 944 pr_err("unknown buddy id %d\n", buddy); 1203 WARN_ON(1); 945 WARN_ON(1); 1204 addr = NULL; 946 addr = NULL; 1205 break; 947 break; 1206 } 948 } 1207 949 1208 if (addr) !! 950 z3fold_page_unlock(zhdr); 1209 zhdr->mapped_count++; << 1210 out: 951 out: 1211 put_z3fold_header(zhdr); << 1212 return addr; 952 return addr; 1213 } 953 } 1214 954 1215 /** 955 /** 1216 * z3fold_unmap() - unmaps the allocation ass 956 * z3fold_unmap() - unmaps the allocation associated with the given handle 1217 * @pool: pool in which the allocation 957 * @pool: pool in which the allocation resides 1218 * @handle: handle associated with the al 958 * @handle: handle associated with the allocation to be unmapped 1219 */ 959 */ 1220 static void z3fold_unmap(struct z3fold_pool * 960 static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1221 { 961 { 1222 struct z3fold_header *zhdr; 962 struct z3fold_header *zhdr; 1223 struct page *page; 963 struct page *page; 1224 enum buddy buddy; 964 enum buddy buddy; 1225 965 1226 zhdr = get_z3fold_header(handle); !! 966 zhdr = handle_to_z3fold_header(handle); 1227 page = virt_to_page(zhdr); 967 page = virt_to_page(zhdr); 1228 968 1229 if (test_bit(PAGE_HEADLESS, &page->pr 969 if (test_bit(PAGE_HEADLESS, &page->private)) 1230 return; 970 return; 1231 971 >> 972 z3fold_page_lock(zhdr); 1232 buddy = handle_to_buddy(handle); 973 buddy = handle_to_buddy(handle); 1233 if (buddy == MIDDLE) 974 if (buddy == MIDDLE) 1234 clear_bit(MIDDLE_CHUNK_MAPPED 975 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1235 zhdr->mapped_count--; !! 976 z3fold_page_unlock(zhdr); 1236 put_z3fold_header(zhdr); << 1237 } 977 } 1238 978 1239 /** 979 /** 1240 * z3fold_get_pool_pages() - gets the z3fold !! 980 * z3fold_get_pool_size() - gets the z3fold pool size in pages 1241 * @pool: pool whose size is being quer 981 * @pool: pool whose size is being queried 1242 * 982 * 1243 * Returns: size in pages of the given pool. 983 * Returns: size in pages of the given pool. 1244 */ 984 */ 1245 static u64 z3fold_get_pool_pages(struct z3fol !! 985 static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 1246 { 986 { 1247 return atomic64_read(&pool->pages_nr) 987 return atomic64_read(&pool->pages_nr); 1248 } 988 } 1249 989 1250 static bool z3fold_page_isolate(struct page * !! 990 /***************** 1251 { !! 991 * zpool 1252 struct z3fold_header *zhdr; !! 992 ****************/ 1253 struct z3fold_pool *pool; << 1254 << 1255 VM_BUG_ON_PAGE(PageIsolated(page), pa << 1256 << 1257 if (test_bit(PAGE_HEADLESS, &page->pr << 1258 return false; << 1259 << 1260 zhdr = page_address(page); << 1261 z3fold_page_lock(zhdr); << 1262 if (test_bit(NEEDS_COMPACTING, &page- << 1263 test_bit(PAGE_STALE, &page->priva << 1264 goto out; << 1265 << 1266 if (zhdr->mapped_count != 0 || zhdr-> << 1267 goto out; << 1268 << 1269 if (test_and_set_bit(PAGE_CLAIMED, &p << 1270 goto out; << 1271 pool = zhdr_to_pool(zhdr); << 1272 spin_lock(&pool->lock); << 1273 if (!list_empty(&zhdr->buddy)) << 1274 list_del_init(&zhdr->buddy); << 1275 spin_unlock(&pool->lock); << 1276 << 1277 kref_get(&zhdr->refcount); << 1278 z3fold_page_unlock(zhdr); << 1279 return true; << 1280 << 1281 out: << 1282 z3fold_page_unlock(zhdr); << 1283 return false; << 1284 } << 1285 << 1286 static int z3fold_page_migrate(struct page *n << 1287 enum migrate_mode mode) << 1288 { << 1289 struct z3fold_header *zhdr, *new_zhdr << 1290 struct z3fold_pool *pool; << 1291 << 1292 VM_BUG_ON_PAGE(!PageIsolated(page), p << 1293 VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED << 1294 VM_BUG_ON_PAGE(!PageLocked(newpage), << 1295 << 1296 zhdr = page_address(page); << 1297 pool = zhdr_to_pool(zhdr); << 1298 << 1299 if (!z3fold_page_trylock(zhdr)) << 1300 return -EAGAIN; << 1301 if (zhdr->mapped_count != 0 || zhdr-> << 1302 clear_bit(PAGE_CLAIMED, &page << 1303 z3fold_page_unlock(zhdr); << 1304 return -EBUSY; << 1305 } << 1306 if (work_pending(&zhdr->work)) { << 1307 z3fold_page_unlock(zhdr); << 1308 return -EAGAIN; << 1309 } << 1310 new_zhdr = page_address(newpage); << 1311 memcpy(new_zhdr, zhdr, PAGE_SIZE); << 1312 newpage->private = page->private; << 1313 set_bit(PAGE_MIGRATED, &page->private << 1314 z3fold_page_unlock(zhdr); << 1315 spin_lock_init(&new_zhdr->page_lock); << 1316 INIT_WORK(&new_zhdr->work, compact_pa << 1317 /* << 1318 * z3fold_page_isolate() ensures that << 1319 * so we only have to reinitialize it << 1320 */ << 1321 INIT_LIST_HEAD(&new_zhdr->buddy); << 1322 __ClearPageMovable(page); << 1323 << 1324 get_page(newpage); << 1325 z3fold_page_lock(new_zhdr); << 1326 if (new_zhdr->first_chunks) << 1327 encode_handle(new_zhdr, FIRST << 1328 if (new_zhdr->last_chunks) << 1329 encode_handle(new_zhdr, LAST) << 1330 if (new_zhdr->middle_chunks) << 1331 encode_handle(new_zhdr, MIDDL << 1332 set_bit(NEEDS_COMPACTING, &newpage->p << 1333 new_zhdr->cpu = smp_processor_id(); << 1334 __SetPageMovable(newpage, &z3fold_mop << 1335 z3fold_page_unlock(new_zhdr); << 1336 << 1337 queue_work_on(new_zhdr->cpu, pool->co << 1338 << 1339 /* PAGE_CLAIMED and PAGE_MIGRATED are << 1340 page->private = 0; << 1341 put_page(page); << 1342 return 0; << 1343 } << 1344 993 1345 static void z3fold_page_putback(struct page * !! 994 static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 1346 { 995 { 1347 struct z3fold_header *zhdr; !! 996 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 1348 struct z3fold_pool *pool; !! 997 return pool->zpool_ops->evict(pool->zpool, handle); 1349 !! 998 else 1350 zhdr = page_address(page); !! 999 return -ENOENT; 1351 pool = zhdr_to_pool(zhdr); << 1352 << 1353 z3fold_page_lock(zhdr); << 1354 if (!list_empty(&zhdr->buddy)) << 1355 list_del_init(&zhdr->buddy); << 1356 INIT_LIST_HEAD(&page->lru); << 1357 if (put_z3fold_locked(zhdr)) << 1358 return; << 1359 if (list_empty(&zhdr->buddy)) << 1360 add_to_unbuddied(pool, zhdr); << 1361 clear_bit(PAGE_CLAIMED, &page->privat << 1362 z3fold_page_unlock(zhdr); << 1363 } 1000 } 1364 1001 1365 static const struct movable_operations z3fold !! 1002 static const struct z3fold_ops z3fold_zpool_ops = { 1366 .isolate_page = z3fold_page_isolate, !! 1003 .evict = z3fold_zpool_evict 1367 .migrate_page = z3fold_page_migrate, << 1368 .putback_page = z3fold_page_putback, << 1369 }; 1004 }; 1370 1005 1371 /***************** !! 1006 static void *z3fold_zpool_create(const char *name, gfp_t gfp, 1372 * zpool !! 1007 const struct zpool_ops *zpool_ops, 1373 ****************/ !! 1008 struct zpool *zpool) 1374 << 1375 static void *z3fold_zpool_create(const char * << 1376 { 1009 { 1377 return z3fold_create_pool(name, gfp); !! 1010 struct z3fold_pool *pool; >> 1011 >> 1012 pool = z3fold_create_pool(name, gfp, >> 1013 zpool_ops ? &z3fold_zpool_ops : NULL); >> 1014 if (pool) { >> 1015 pool->zpool = zpool; >> 1016 pool->zpool_ops = zpool_ops; >> 1017 } >> 1018 return pool; 1378 } 1019 } 1379 1020 1380 static void z3fold_zpool_destroy(void *pool) 1021 static void z3fold_zpool_destroy(void *pool) 1381 { 1022 { 1382 z3fold_destroy_pool(pool); 1023 z3fold_destroy_pool(pool); 1383 } 1024 } 1384 1025 1385 static int z3fold_zpool_malloc(void *pool, si 1026 static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1386 unsigned long *handle 1027 unsigned long *handle) 1387 { 1028 { 1388 return z3fold_alloc(pool, size, gfp, 1029 return z3fold_alloc(pool, size, gfp, handle); 1389 } 1030 } 1390 static void z3fold_zpool_free(void *pool, uns 1031 static void z3fold_zpool_free(void *pool, unsigned long handle) 1391 { 1032 { 1392 z3fold_free(pool, handle); 1033 z3fold_free(pool, handle); 1393 } 1034 } 1394 1035 >> 1036 static int z3fold_zpool_shrink(void *pool, unsigned int pages, >> 1037 unsigned int *reclaimed) >> 1038 { >> 1039 unsigned int total = 0; >> 1040 int ret = -EINVAL; >> 1041 >> 1042 while (total < pages) { >> 1043 ret = z3fold_reclaim_page(pool, 8); >> 1044 if (ret < 0) >> 1045 break; >> 1046 total++; >> 1047 } >> 1048 >> 1049 if (reclaimed) >> 1050 *reclaimed = total; >> 1051 >> 1052 return ret; >> 1053 } >> 1054 1395 static void *z3fold_zpool_map(void *pool, uns 1055 static void *z3fold_zpool_map(void *pool, unsigned long handle, 1396 enum zpool_mapmode mm 1056 enum zpool_mapmode mm) 1397 { 1057 { 1398 return z3fold_map(pool, handle); 1058 return z3fold_map(pool, handle); 1399 } 1059 } 1400 static void z3fold_zpool_unmap(void *pool, un 1060 static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1401 { 1061 { 1402 z3fold_unmap(pool, handle); 1062 z3fold_unmap(pool, handle); 1403 } 1063 } 1404 1064 1405 static u64 z3fold_zpool_total_pages(void *poo !! 1065 static u64 z3fold_zpool_total_size(void *pool) 1406 { 1066 { 1407 return z3fold_get_pool_pages(pool); !! 1067 return z3fold_get_pool_size(pool) * PAGE_SIZE; 1408 } 1068 } 1409 1069 1410 static struct zpool_driver z3fold_zpool_drive 1070 static struct zpool_driver z3fold_zpool_driver = { 1411 .type = "z3fold", 1071 .type = "z3fold", 1412 .sleep_mapped = true, << 1413 .owner = THIS_MODULE, 1072 .owner = THIS_MODULE, 1414 .create = z3fold_zpool_create, 1073 .create = z3fold_zpool_create, 1415 .destroy = z3fold_zpool_destroy, 1074 .destroy = z3fold_zpool_destroy, 1416 .malloc = z3fold_zpool_malloc, 1075 .malloc = z3fold_zpool_malloc, 1417 .free = z3fold_zpool_free, 1076 .free = z3fold_zpool_free, >> 1077 .shrink = z3fold_zpool_shrink, 1418 .map = z3fold_zpool_map, 1078 .map = z3fold_zpool_map, 1419 .unmap = z3fold_zpool_unmap, 1079 .unmap = z3fold_zpool_unmap, 1420 .total_pages = z3fold_zpool_total_pa !! 1080 .total_size = z3fold_zpool_total_size, 1421 }; 1081 }; 1422 1082 1423 MODULE_ALIAS("zpool-z3fold"); 1083 MODULE_ALIAS("zpool-z3fold"); 1424 1084 1425 static int __init init_z3fold(void) 1085 static int __init init_z3fold(void) 1426 { 1086 { 1427 /* !! 1087 /* Make sure the z3fold header is not larger than the page size */ 1428 * Make sure the z3fold header is not !! 1088 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 1429 * there has remaining spaces for its << 1430 */ << 1431 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE << 1432 zpool_register_driver(&z3fold_zpool_d 1089 zpool_register_driver(&z3fold_zpool_driver); 1433 1090 1434 return 0; 1091 return 0; 1435 } 1092 } 1436 1093 1437 static void __exit exit_z3fold(void) 1094 static void __exit exit_z3fold(void) 1438 { 1095 { 1439 zpool_unregister_driver(&z3fold_zpool 1096 zpool_unregister_driver(&z3fold_zpool_driver); 1440 } 1097 } 1441 1098 1442 module_init(init_z3fold); 1099 module_init(init_z3fold); 1443 module_exit(exit_z3fold); 1100 module_exit(exit_z3fold); 1444 1101 1445 MODULE_LICENSE("GPL"); 1102 MODULE_LICENSE("GPL"); 1446 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail. 1103 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1447 MODULE_DESCRIPTION("3-Fold Allocator for Comp 1104 MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1448 1105
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.