1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * z3fold.c 2 * z3fold.c 4 * 3 * 5 * Author: Vitaly Wool <vitaly.wool@konsulko.c 4 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 6 * Copyright (C) 2016, Sony Mobile Communicati 5 * Copyright (C) 2016, Sony Mobile Communications Inc. 7 * 6 * 8 * This implementation is based on zbud writte 7 * This implementation is based on zbud written by Seth Jennings. 9 * 8 * 10 * z3fold is an special purpose allocator for 9 * z3fold is an special purpose allocator for storing compressed pages. It 11 * can store up to three compressed pages per 10 * can store up to three compressed pages per page which improves the 12 * compression ratio of zbud while retaining i 11 * compression ratio of zbud while retaining its main concepts (e. g. always 13 * storing an integral number of objects per p 12 * storing an integral number of objects per page) and simplicity. 14 * It still has simple and deterministic recla 13 * It still has simple and deterministic reclaim properties that make it 15 * preferable to a higher density approach (wi 14 * preferable to a higher density approach (with no requirement on integral 16 * number of object per page) when reclaim is 15 * number of object per page) when reclaim is used. 17 * 16 * 18 * As in zbud, pages are divided into "chunks" 17 * As in zbud, pages are divided into "chunks". The size of the chunks is 19 * fixed at compile time and is determined by 18 * fixed at compile time and is determined by NCHUNKS_ORDER below. 20 * 19 * 21 * z3fold doesn't export any API and is meant 20 * z3fold doesn't export any API and is meant to be used via zpool API. 22 */ 21 */ 23 22 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 25 24 26 #include <linux/atomic.h> 25 #include <linux/atomic.h> 27 #include <linux/sched.h> 26 #include <linux/sched.h> 28 #include <linux/cpumask.h> << 29 #include <linux/list.h> 27 #include <linux/list.h> 30 #include <linux/mm.h> 28 #include <linux/mm.h> 31 #include <linux/module.h> 29 #include <linux/module.h> 32 #include <linux/page-flags.h> << 33 #include <linux/migrate.h> << 34 #include <linux/node.h> << 35 #include <linux/compaction.h> << 36 #include <linux/percpu.h> 30 #include <linux/percpu.h> 37 #include <linux/preempt.h> 31 #include <linux/preempt.h> 38 #include <linux/workqueue.h> 32 #include <linux/workqueue.h> 39 #include <linux/slab.h> 33 #include <linux/slab.h> 40 #include <linux/spinlock.h> 34 #include <linux/spinlock.h> 41 #include <linux/zpool.h> 35 #include <linux/zpool.h> 42 #include <linux/kmemleak.h> << 43 << 44 /* << 45 * NCHUNKS_ORDER determines the internal alloc << 46 * adjusting internal fragmentation. It also << 47 * freelists maintained in each pool. NCHUNKS_ << 48 * allocation granularity will be in chunks of << 49 * in the beginning of an allocated page are o << 50 * NCHUNKS will be calculated to 63 (or 62 in << 51 * which shows the max number of free chunks i << 52 * be 63, or 62, respectively, freelists per p << 53 */ << 54 #define NCHUNKS_ORDER 6 << 55 << 56 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ << 57 #define CHUNK_SIZE (1 << CHUNK_SHIFT) << 58 #define ZHDR_SIZE_ALIGNED round_up(sizeof(stru << 59 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> << 60 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SH << 61 #define NCHUNKS (TOTAL_CHUNKS - ZHDR_C << 62 << 63 #define BUDDY_MASK (0x3) << 64 #define BUDDY_SHIFT 2 << 65 #define SLOTS_ALIGN (0x40) << 66 36 67 /***************** 37 /***************** 68 * Structures 38 * Structures 69 *****************/ 39 *****************/ 70 struct z3fold_pool; 40 struct z3fold_pool; >> 41 struct z3fold_ops { >> 42 int (*evict)(struct z3fold_pool *pool, unsigned long handle); >> 43 }; 71 44 72 enum buddy { 45 enum buddy { 73 HEADLESS = 0, 46 HEADLESS = 0, 74 FIRST, 47 FIRST, 75 MIDDLE, 48 MIDDLE, 76 LAST, 49 LAST, 77 BUDDIES_MAX = LAST !! 50 BUDDIES_MAX 78 }; 51 }; 79 52 80 struct z3fold_buddy_slots { << 81 /* << 82 * we are using BUDDY_MASK in handle_t << 83 * be enough slots to hold all possibl << 84 */ << 85 unsigned long slot[BUDDY_MASK + 1]; << 86 unsigned long pool; /* back link */ << 87 rwlock_t lock; << 88 }; << 89 #define HANDLE_FLAG_MASK (0x03) << 90 << 91 /* 53 /* 92 * struct z3fold_header - z3fold page metadata 54 * struct z3fold_header - z3fold page metadata occupying first chunks of each 93 * z3fold page, except fo 55 * z3fold page, except for HEADLESS pages 94 * @buddy: links the z3fold page 56 * @buddy: links the z3fold page into the relevant list in the 95 * pool 57 * pool 96 * @page_lock: per-page lock 58 * @page_lock: per-page lock 97 * @refcount: reference count for th 59 * @refcount: reference count for the z3fold page 98 * @work: work_struct for page l 60 * @work: work_struct for page layout optimization 99 * @slots: pointer to the structu !! 61 * @pool: pointer to the pool which this page belongs to 100 * @pool: pointer to the contain << 101 * @cpu: CPU which this page "b 62 * @cpu: CPU which this page "belongs" to 102 * @first_chunks: the size of the first 63 * @first_chunks: the size of the first buddy in chunks, 0 if free 103 * @middle_chunks: the size of the middle 64 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 104 * @last_chunks: the size of the last b 65 * @last_chunks: the size of the last buddy in chunks, 0 if free 105 * @first_num: the starting number (f 66 * @first_num: the starting number (for the first handle) 106 * @mapped_count: the number of objects << 107 */ 67 */ 108 struct z3fold_header { 68 struct z3fold_header { 109 struct list_head buddy; 69 struct list_head buddy; 110 spinlock_t page_lock; 70 spinlock_t page_lock; 111 struct kref refcount; 71 struct kref refcount; 112 struct work_struct work; 72 struct work_struct work; 113 struct z3fold_buddy_slots *slots; << 114 struct z3fold_pool *pool; 73 struct z3fold_pool *pool; 115 short cpu; 74 short cpu; 116 unsigned short first_chunks; 75 unsigned short first_chunks; 117 unsigned short middle_chunks; 76 unsigned short middle_chunks; 118 unsigned short last_chunks; 77 unsigned short last_chunks; 119 unsigned short start_middle; 78 unsigned short start_middle; 120 unsigned short first_num:2; 79 unsigned short first_num:2; 121 unsigned short mapped_count:2; << 122 unsigned short foreign_handles:2; << 123 }; 80 }; 124 81 >> 82 /* >> 83 * NCHUNKS_ORDER determines the internal allocation granularity, effectively >> 84 * adjusting internal fragmentation. It also determines the number of >> 85 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the >> 86 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks >> 87 * in the beginning of an allocated page are occupied by z3fold header, so >> 88 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), >> 89 * which shows the max number of free chunks in z3fold page, also there will >> 90 * be 63, or 62, respectively, freelists per pool. >> 91 */ >> 92 #define NCHUNKS_ORDER 6 >> 93 >> 94 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) >> 95 #define CHUNK_SIZE (1 << CHUNK_SHIFT) >> 96 #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) >> 97 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) >> 98 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) >> 99 #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) >> 100 >> 101 #define BUDDY_MASK (0x3) >> 102 #define BUDDY_SHIFT 2 >> 103 125 /** 104 /** 126 * struct z3fold_pool - stores metadata for ea 105 * struct z3fold_pool - stores metadata for each z3fold pool 127 * @name: pool name 106 * @name: pool name 128 * @lock: protects pool unbuddied lists !! 107 * @lock: protects pool unbuddied/lru lists 129 * @stale_lock: protects pool stale page list 108 * @stale_lock: protects pool stale page list 130 * @unbuddied: per-cpu array of lists trackin 109 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 131 * buddies; the list each z3fold 110 * buddies; the list each z3fold page is added to depends on 132 * the size of its free region. 111 * the size of its free region. >> 112 * @lru: list tracking the z3fold pages in LRU order by most recently >> 113 * added buddy. 133 * @stale: list of pages marked for freei 114 * @stale: list of pages marked for freeing 134 * @pages_nr: number of z3fold pages in the 115 * @pages_nr: number of z3fold pages in the pool. 135 * @c_handle: cache for z3fold_buddy_slots a !! 116 * @ops: pointer to a structure of user defined operations specified at >> 117 * pool creation time. 136 * @compact_wq: workqueue for page layout back 118 * @compact_wq: workqueue for page layout background optimization 137 * @release_wq: workqueue for safe page releas 119 * @release_wq: workqueue for safe page release 138 * @work: work_struct for safe page rele 120 * @work: work_struct for safe page release 139 * 121 * 140 * This structure is allocated at pool creatio 122 * This structure is allocated at pool creation time and maintains metadata 141 * pertaining to a particular z3fold pool. 123 * pertaining to a particular z3fold pool. 142 */ 124 */ 143 struct z3fold_pool { 125 struct z3fold_pool { 144 const char *name; 126 const char *name; 145 spinlock_t lock; 127 spinlock_t lock; 146 spinlock_t stale_lock; 128 spinlock_t stale_lock; 147 struct list_head __percpu *unbuddied; !! 129 struct list_head *unbuddied; >> 130 struct list_head lru; 148 struct list_head stale; 131 struct list_head stale; 149 atomic64_t pages_nr; 132 atomic64_t pages_nr; 150 struct kmem_cache *c_handle; !! 133 const struct z3fold_ops *ops; >> 134 struct zpool *zpool; >> 135 const struct zpool_ops *zpool_ops; 151 struct workqueue_struct *compact_wq; 136 struct workqueue_struct *compact_wq; 152 struct workqueue_struct *release_wq; 137 struct workqueue_struct *release_wq; 153 struct work_struct work; 138 struct work_struct work; 154 }; 139 }; 155 140 156 /* 141 /* 157 * Internal z3fold page flags 142 * Internal z3fold page flags 158 */ 143 */ 159 enum z3fold_page_flags { 144 enum z3fold_page_flags { 160 PAGE_HEADLESS = 0, 145 PAGE_HEADLESS = 0, 161 MIDDLE_CHUNK_MAPPED, 146 MIDDLE_CHUNK_MAPPED, 162 NEEDS_COMPACTING, 147 NEEDS_COMPACTING, 163 PAGE_STALE, 148 PAGE_STALE, 164 PAGE_CLAIMED, /* by either reclaim or 149 PAGE_CLAIMED, /* by either reclaim or free */ 165 PAGE_MIGRATED, /* page is migrated and << 166 }; << 167 << 168 /* << 169 * handle flags, go under HANDLE_FLAG_MASK << 170 */ << 171 enum z3fold_handle_flags { << 172 HANDLES_NOFREE = 0, << 173 }; 150 }; 174 151 175 /* << 176 * Forward declarations << 177 */ << 178 static struct z3fold_header *__z3fold_alloc(st << 179 static void compact_page_work(struct work_stru << 180 << 181 /***************** 152 /***************** 182 * Helpers 153 * Helpers 183 *****************/ 154 *****************/ 184 155 185 /* Converts an allocation size in bytes to siz 156 /* Converts an allocation size in bytes to size in z3fold chunks */ 186 static int size_to_chunks(size_t size) 157 static int size_to_chunks(size_t size) 187 { 158 { 188 return (size + CHUNK_SIZE - 1) >> CHUN 159 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 189 } 160 } 190 161 191 #define for_each_unbuddied_list(_iter, _begin) 162 #define for_each_unbuddied_list(_iter, _begin) \ 192 for ((_iter) = (_begin); (_iter) < NCH 163 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 193 164 194 static inline struct z3fold_buddy_slots *alloc !! 165 static void compact_page_work(struct work_struct *w); 195 << 196 { << 197 struct z3fold_buddy_slots *slots = kme << 198 << 199 << 200 if (slots) { << 201 /* It will be freed separately << 202 kmemleak_not_leak(slots); << 203 slots->pool = (unsigned long)p << 204 rwlock_init(&slots->lock); << 205 } << 206 << 207 return slots; << 208 } << 209 << 210 static inline struct z3fold_pool *slots_to_poo << 211 { << 212 return (struct z3fold_pool *)(s->pool << 213 } << 214 << 215 static inline struct z3fold_buddy_slots *handl << 216 { << 217 return (struct z3fold_buddy_slots *)(h << 218 } << 219 << 220 /* Lock a z3fold page */ << 221 static inline void z3fold_page_lock(struct z3f << 222 { << 223 spin_lock(&zhdr->page_lock); << 224 } << 225 << 226 /* Try to lock a z3fold page */ << 227 static inline int z3fold_page_trylock(struct z << 228 { << 229 return spin_trylock(&zhdr->page_lock); << 230 } << 231 << 232 /* Unlock a z3fold page */ << 233 static inline void z3fold_page_unlock(struct z << 234 { << 235 spin_unlock(&zhdr->page_lock); << 236 } << 237 << 238 /* return locked z3fold page if it's not headl << 239 static inline struct z3fold_header *get_z3fold << 240 { << 241 struct z3fold_buddy_slots *slots; << 242 struct z3fold_header *zhdr; << 243 int locked = 0; << 244 << 245 if (!(handle & (1 << PAGE_HEADLESS))) << 246 slots = handle_to_slots(handle << 247 do { << 248 unsigned long addr; << 249 << 250 read_lock(&slots->lock << 251 addr = *(unsigned long << 252 zhdr = (struct z3fold_ << 253 locked = z3fold_page_t << 254 read_unlock(&slots->lo << 255 if (locked) { << 256 struct page *p << 257 << 258 if (!test_bit( << 259 break; << 260 z3fold_page_un << 261 } << 262 cpu_relax(); << 263 } while (true); << 264 } else { << 265 zhdr = (struct z3fold_header * << 266 } << 267 << 268 return zhdr; << 269 } << 270 << 271 static inline void put_z3fold_header(struct z3 << 272 { << 273 struct page *page = virt_to_page(zhdr) << 274 << 275 if (!test_bit(PAGE_HEADLESS, &page->pr << 276 z3fold_page_unlock(zhdr); << 277 } << 278 << 279 static inline void free_handle(unsigned long h << 280 { << 281 struct z3fold_buddy_slots *slots; << 282 int i; << 283 bool is_free; << 284 << 285 if (WARN_ON(*(unsigned long *)handle = << 286 return; << 287 << 288 slots = handle_to_slots(handle); << 289 write_lock(&slots->lock); << 290 *(unsigned long *)handle = 0; << 291 << 292 if (test_bit(HANDLES_NOFREE, &slots->p << 293 write_unlock(&slots->lock); << 294 return; /* simple case, nothin << 295 } << 296 << 297 if (zhdr->slots != slots) << 298 zhdr->foreign_handles--; << 299 << 300 is_free = true; << 301 for (i = 0; i <= BUDDY_MASK; i++) { << 302 if (slots->slot[i]) { << 303 is_free = false; << 304 break; << 305 } << 306 } << 307 write_unlock(&slots->lock); << 308 << 309 if (is_free) { << 310 struct z3fold_pool *pool = slo << 311 << 312 if (zhdr->slots == slots) << 313 zhdr->slots = NULL; << 314 kmem_cache_free(pool->c_handle << 315 } << 316 } << 317 166 318 /* Initializes the z3fold header of a newly al 167 /* Initializes the z3fold header of a newly allocated z3fold page */ 319 static struct z3fold_header *init_z3fold_page( !! 168 static struct z3fold_header *init_z3fold_page(struct page *page, 320 struct !! 169 struct z3fold_pool *pool) 321 { 170 { 322 struct z3fold_header *zhdr = page_addr 171 struct z3fold_header *zhdr = page_address(page); 323 struct z3fold_buddy_slots *slots; << 324 172 >> 173 INIT_LIST_HEAD(&page->lru); 325 clear_bit(PAGE_HEADLESS, &page->privat 174 clear_bit(PAGE_HEADLESS, &page->private); 326 clear_bit(MIDDLE_CHUNK_MAPPED, &page-> 175 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 327 clear_bit(NEEDS_COMPACTING, &page->pri 176 clear_bit(NEEDS_COMPACTING, &page->private); 328 clear_bit(PAGE_STALE, &page->private); 177 clear_bit(PAGE_STALE, &page->private); 329 clear_bit(PAGE_CLAIMED, &page->private 178 clear_bit(PAGE_CLAIMED, &page->private); 330 clear_bit(PAGE_MIGRATED, &page->privat << 331 if (headless) << 332 return zhdr; << 333 << 334 slots = alloc_slots(pool, gfp); << 335 if (!slots) << 336 return NULL; << 337 179 338 memset(zhdr, 0, sizeof(*zhdr)); << 339 spin_lock_init(&zhdr->page_lock); 180 spin_lock_init(&zhdr->page_lock); 340 kref_init(&zhdr->refcount); 181 kref_init(&zhdr->refcount); >> 182 zhdr->first_chunks = 0; >> 183 zhdr->middle_chunks = 0; >> 184 zhdr->last_chunks = 0; >> 185 zhdr->first_num = 0; >> 186 zhdr->start_middle = 0; 341 zhdr->cpu = -1; 187 zhdr->cpu = -1; 342 zhdr->slots = slots; << 343 zhdr->pool = pool; 188 zhdr->pool = pool; 344 INIT_LIST_HEAD(&zhdr->buddy); 189 INIT_LIST_HEAD(&zhdr->buddy); 345 INIT_WORK(&zhdr->work, compact_page_wo 190 INIT_WORK(&zhdr->work, compact_page_work); 346 return zhdr; 191 return zhdr; 347 } 192 } 348 193 349 /* Resets the struct page fields and frees the 194 /* Resets the struct page fields and frees the page */ 350 static void free_z3fold_page(struct page *page !! 195 static void free_z3fold_page(struct page *page) 351 { 196 { 352 if (!headless) { << 353 lock_page(page); << 354 __ClearPageMovable(page); << 355 unlock_page(page); << 356 } << 357 __free_page(page); 197 __free_page(page); 358 } 198 } 359 199 360 /* Helper function to build the index */ !! 200 /* Lock a z3fold page */ 361 static inline int __idx(struct z3fold_header * !! 201 static inline void z3fold_page_lock(struct z3fold_header *zhdr) >> 202 { >> 203 spin_lock(&zhdr->page_lock); >> 204 } >> 205 >> 206 /* Try to lock a z3fold page */ >> 207 static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 362 { 208 { 363 return (bud + zhdr->first_num) & BUDDY !! 209 return spin_trylock(&zhdr->page_lock); >> 210 } >> 211 >> 212 /* Unlock a z3fold page */ >> 213 static inline void z3fold_page_unlock(struct z3fold_header *zhdr) >> 214 { >> 215 spin_unlock(&zhdr->page_lock); 364 } 216 } 365 217 366 /* 218 /* 367 * Encodes the handle of a particular buddy wi !! 219 * Encodes the handle of a particular buddy within a z3fold page 368 * Zhdr->page_lock should be held as this func !! 220 * Pool lock should be held as this function accesses first_num 369 * if bud != HEADLESS. << 370 */ 221 */ 371 static unsigned long __encode_handle(struct z3 !! 222 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 372 struct z3fold_ << 373 enum buddy bud << 374 { 223 { 375 unsigned long h = (unsigned long)zhdr; !! 224 unsigned long handle; 376 int idx = 0; << 377 225 378 /* !! 226 handle = (unsigned long)zhdr; 379 * For a headless page, its handle is !! 227 if (bud != HEADLESS) { 380 * PAGE_HEADLESS bit set !! 228 handle |= (bud + zhdr->first_num) & BUDDY_MASK; 381 */ !! 229 if (bud == LAST) 382 if (bud == HEADLESS) !! 230 handle |= (zhdr->last_chunks << BUDDY_SHIFT); 383 return h | (1 << PAGE_HEADLESS !! 231 } 384 !! 232 return handle; 385 /* otherwise, return pointer to encode << 386 idx = __idx(zhdr, bud); << 387 h += idx; << 388 if (bud == LAST) << 389 h |= (zhdr->last_chunks << BUD << 390 << 391 write_lock(&slots->lock); << 392 slots->slot[idx] = h; << 393 write_unlock(&slots->lock); << 394 return (unsigned long)&slots->slot[idx << 395 } 233 } 396 234 397 static unsigned long encode_handle(struct z3fo !! 235 /* Returns the z3fold page where a given handle is stored */ >> 236 static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) 398 { 237 { 399 return __encode_handle(zhdr, zhdr->slo !! 238 return (struct z3fold_header *)(handle & PAGE_MASK); 400 } 239 } 401 240 402 /* only for LAST bud, returns zero otherwise * 241 /* only for LAST bud, returns zero otherwise */ 403 static unsigned short handle_to_chunks(unsigne 242 static unsigned short handle_to_chunks(unsigned long handle) 404 { 243 { 405 struct z3fold_buddy_slots *slots = han !! 244 return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; 406 unsigned long addr; << 407 << 408 read_lock(&slots->lock); << 409 addr = *(unsigned long *)handle; << 410 read_unlock(&slots->lock); << 411 return (addr & ~PAGE_MASK) >> BUDDY_SH << 412 } 245 } 413 246 414 /* 247 /* 415 * (handle & BUDDY_MASK) < zhdr->first_num is 248 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 416 * but that doesn't matter. because the maski 249 * but that doesn't matter. because the masking will result in the 417 * correct buddy number. 250 * correct buddy number. 418 */ 251 */ 419 static enum buddy handle_to_buddy(unsigned lon 252 static enum buddy handle_to_buddy(unsigned long handle) 420 { 253 { 421 struct z3fold_header *zhdr; !! 254 struct z3fold_header *zhdr = handle_to_z3fold_header(handle); 422 struct z3fold_buddy_slots *slots = han !! 255 return (handle - zhdr->first_num) & BUDDY_MASK; 423 unsigned long addr; << 424 << 425 read_lock(&slots->lock); << 426 WARN_ON(handle & (1 << PAGE_HEADLESS)) << 427 addr = *(unsigned long *)handle; << 428 read_unlock(&slots->lock); << 429 zhdr = (struct z3fold_header *)(addr & << 430 return (addr - zhdr->first_num) & BUDD << 431 } << 432 << 433 static inline struct z3fold_pool *zhdr_to_pool << 434 { << 435 return zhdr->pool; << 436 } 256 } 437 257 438 static void __release_z3fold_page(struct z3fol 258 static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 439 { 259 { 440 struct page *page = virt_to_page(zhdr) 260 struct page *page = virt_to_page(zhdr); 441 struct z3fold_pool *pool = zhdr_to_poo !! 261 struct z3fold_pool *pool = zhdr->pool; 442 262 443 WARN_ON(!list_empty(&zhdr->buddy)); 263 WARN_ON(!list_empty(&zhdr->buddy)); 444 set_bit(PAGE_STALE, &page->private); 264 set_bit(PAGE_STALE, &page->private); 445 clear_bit(NEEDS_COMPACTING, &page->pri 265 clear_bit(NEEDS_COMPACTING, &page->private); 446 spin_lock(&pool->lock); 266 spin_lock(&pool->lock); >> 267 if (!list_empty(&page->lru)) >> 268 list_del(&page->lru); 447 spin_unlock(&pool->lock); 269 spin_unlock(&pool->lock); 448 << 449 if (locked) 270 if (locked) 450 z3fold_page_unlock(zhdr); 271 z3fold_page_unlock(zhdr); 451 << 452 spin_lock(&pool->stale_lock); 272 spin_lock(&pool->stale_lock); 453 list_add(&zhdr->buddy, &pool->stale); 273 list_add(&zhdr->buddy, &pool->stale); 454 queue_work(pool->release_wq, &pool->wo 274 queue_work(pool->release_wq, &pool->work); 455 spin_unlock(&pool->stale_lock); 275 spin_unlock(&pool->stale_lock); >> 276 } 456 277 457 atomic64_dec(&pool->pages_nr); !! 278 static void __attribute__((__unused__)) >> 279 release_z3fold_page(struct kref *ref) >> 280 { >> 281 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, >> 282 refcount); >> 283 __release_z3fold_page(zhdr, false); 458 } 284 } 459 285 460 static void release_z3fold_page_locked(struct 286 static void release_z3fold_page_locked(struct kref *ref) 461 { 287 { 462 struct z3fold_header *zhdr = container 288 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 463 289 refcount); 464 WARN_ON(z3fold_page_trylock(zhdr)); 290 WARN_ON(z3fold_page_trylock(zhdr)); 465 __release_z3fold_page(zhdr, true); 291 __release_z3fold_page(zhdr, true); 466 } 292 } 467 293 468 static void release_z3fold_page_locked_list(st 294 static void release_z3fold_page_locked_list(struct kref *ref) 469 { 295 { 470 struct z3fold_header *zhdr = container 296 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 471 297 refcount); 472 struct z3fold_pool *pool = zhdr_to_poo !! 298 spin_lock(&zhdr->pool->lock); 473 << 474 spin_lock(&pool->lock); << 475 list_del_init(&zhdr->buddy); 299 list_del_init(&zhdr->buddy); 476 spin_unlock(&pool->lock); !! 300 spin_unlock(&zhdr->pool->lock); 477 301 478 WARN_ON(z3fold_page_trylock(zhdr)); 302 WARN_ON(z3fold_page_trylock(zhdr)); 479 __release_z3fold_page(zhdr, true); 303 __release_z3fold_page(zhdr, true); 480 } 304 } 481 305 482 static inline int put_z3fold_locked(struct z3f << 483 { << 484 return kref_put(&zhdr->refcount, relea << 485 } << 486 << 487 static inline int put_z3fold_locked_list(struc << 488 { << 489 return kref_put(&zhdr->refcount, relea << 490 } << 491 << 492 static void free_pages_work(struct work_struct 306 static void free_pages_work(struct work_struct *w) 493 { 307 { 494 struct z3fold_pool *pool = container_o 308 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 495 309 496 spin_lock(&pool->stale_lock); 310 spin_lock(&pool->stale_lock); 497 while (!list_empty(&pool->stale)) { 311 while (!list_empty(&pool->stale)) { 498 struct z3fold_header *zhdr = l 312 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 499 313 struct z3fold_header, buddy); 500 struct page *page = virt_to_pa 314 struct page *page = virt_to_page(zhdr); 501 315 502 list_del(&zhdr->buddy); 316 list_del(&zhdr->buddy); 503 if (WARN_ON(!test_bit(PAGE_STA 317 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 504 continue; 318 continue; 505 spin_unlock(&pool->stale_lock) 319 spin_unlock(&pool->stale_lock); 506 cancel_work_sync(&zhdr->work); 320 cancel_work_sync(&zhdr->work); 507 free_z3fold_page(page, false); !! 321 free_z3fold_page(page); 508 cond_resched(); 322 cond_resched(); 509 spin_lock(&pool->stale_lock); 323 spin_lock(&pool->stale_lock); 510 } 324 } 511 spin_unlock(&pool->stale_lock); 325 spin_unlock(&pool->stale_lock); 512 } 326 } 513 327 514 /* 328 /* 515 * Returns the number of free chunks in a z3fo 329 * Returns the number of free chunks in a z3fold page. 516 * NB: can't be used with HEADLESS pages. 330 * NB: can't be used with HEADLESS pages. 517 */ 331 */ 518 static int num_free_chunks(struct z3fold_heade 332 static int num_free_chunks(struct z3fold_header *zhdr) 519 { 333 { 520 int nfree; 334 int nfree; 521 /* 335 /* 522 * If there is a middle object, pick u 336 * If there is a middle object, pick up the bigger free space 523 * either before or after it. Otherwis 337 * either before or after it. Otherwise just subtract the number 524 * of chunks occupied by the first and 338 * of chunks occupied by the first and the last objects. 525 */ 339 */ 526 if (zhdr->middle_chunks != 0) { 340 if (zhdr->middle_chunks != 0) { 527 int nfree_before = zhdr->first 341 int nfree_before = zhdr->first_chunks ? 528 0 : zhdr->start_middle 342 0 : zhdr->start_middle - ZHDR_CHUNKS; 529 int nfree_after = zhdr->last_c 343 int nfree_after = zhdr->last_chunks ? 530 0 : TOTAL_CHUNKS - 344 0 : TOTAL_CHUNKS - 531 (zhdr->start_m 345 (zhdr->start_middle + zhdr->middle_chunks); 532 nfree = max(nfree_before, nfre 346 nfree = max(nfree_before, nfree_after); 533 } else 347 } else 534 nfree = NCHUNKS - zhdr->first_ 348 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 535 return nfree; 349 return nfree; 536 } 350 } 537 351 538 /* Add to the appropriate unbuddied list */ << 539 static inline void add_to_unbuddied(struct z3f << 540 struct z3fold_ << 541 { << 542 if (zhdr->first_chunks == 0 || zhdr->l << 543 zhdr->middle_chunks == << 544 struct list_head *unbuddied; << 545 int freechunks = num_free_chun << 546 << 547 migrate_disable(); << 548 unbuddied = this_cpu_ptr(pool- << 549 spin_lock(&pool->lock); << 550 list_add(&zhdr->buddy, &unbudd << 551 spin_unlock(&pool->lock); << 552 zhdr->cpu = smp_processor_id() << 553 migrate_enable(); << 554 } << 555 } << 556 << 557 static inline enum buddy get_free_buddy(struct << 558 { << 559 enum buddy bud = HEADLESS; << 560 << 561 if (zhdr->middle_chunks) { << 562 if (!zhdr->first_chunks && << 563 chunks <= zhdr->start_midd << 564 bud = FIRST; << 565 else if (!zhdr->last_chunks) << 566 bud = LAST; << 567 } else { << 568 if (!zhdr->first_chunks) << 569 bud = FIRST; << 570 else if (!zhdr->last_chunks) << 571 bud = LAST; << 572 else << 573 bud = MIDDLE; << 574 } << 575 << 576 return bud; << 577 } << 578 << 579 static inline void *mchunk_memmove(struct z3fo 352 static inline void *mchunk_memmove(struct z3fold_header *zhdr, 580 unsigned short 353 unsigned short dst_chunk) 581 { 354 { 582 void *beg = zhdr; 355 void *beg = zhdr; 583 return memmove(beg + (dst_chunk << CHU 356 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 584 beg + (zhdr->start_midd 357 beg + (zhdr->start_middle << CHUNK_SHIFT), 585 zhdr->middle_chunks << 358 zhdr->middle_chunks << CHUNK_SHIFT); 586 } 359 } 587 360 588 static inline bool buddy_single(struct z3fold_ << 589 { << 590 return !((zhdr->first_chunks && zhdr-> << 591 (zhdr->first_chunks && << 592 (zhdr->middle_chunks & << 593 } << 594 << 595 static struct z3fold_header *compact_single_bu << 596 { << 597 struct z3fold_pool *pool = zhdr_to_poo << 598 void *p = zhdr; << 599 unsigned long old_handle = 0; << 600 size_t sz = 0; << 601 struct z3fold_header *new_zhdr = NULL; << 602 int first_idx = __idx(zhdr, FIRST); << 603 int middle_idx = __idx(zhdr, MIDDLE); << 604 int last_idx = __idx(zhdr, LAST); << 605 unsigned short *moved_chunks = NULL; << 606 << 607 /* << 608 * No need to protect slots here -- al << 609 * the page lock is already taken << 610 */ << 611 if (zhdr->first_chunks && zhdr->slots- << 612 p += ZHDR_SIZE_ALIGNED; << 613 sz = zhdr->first_chunks << CHU << 614 old_handle = (unsigned long)&z << 615 moved_chunks = &zhdr->first_ch << 616 } else if (zhdr->middle_chunks && zhdr << 617 p += zhdr->start_middle << CHU << 618 sz = zhdr->middle_chunks << CH << 619 old_handle = (unsigned long)&z << 620 moved_chunks = &zhdr->middle_c << 621 } else if (zhdr->last_chunks && zhdr-> << 622 p += PAGE_SIZE - (zhdr->last_c << 623 sz = zhdr->last_chunks << CHUN << 624 old_handle = (unsigned long)&z << 625 moved_chunks = &zhdr->last_chu << 626 } << 627 << 628 if (sz > 0) { << 629 enum buddy new_bud = HEADLESS; << 630 short chunks = size_to_chunks( << 631 void *q; << 632 << 633 new_zhdr = __z3fold_alloc(pool << 634 if (!new_zhdr) << 635 return NULL; << 636 << 637 if (WARN_ON(new_zhdr == zhdr)) << 638 goto out_fail; << 639 << 640 new_bud = get_free_buddy(new_z << 641 q = new_zhdr; << 642 switch (new_bud) { << 643 case FIRST: << 644 new_zhdr->first_chunks << 645 q += ZHDR_SIZE_ALIGNED << 646 break; << 647 case MIDDLE: << 648 new_zhdr->middle_chunk << 649 new_zhdr->start_middle << 650 new_zhdr->firs << 651 q += new_zhdr->start_m << 652 break; << 653 case LAST: << 654 new_zhdr->last_chunks << 655 q += PAGE_SIZE - (new_ << 656 break; << 657 default: << 658 goto out_fail; << 659 } << 660 new_zhdr->foreign_handles++; << 661 memcpy(q, p, sz); << 662 write_lock(&zhdr->slots->lock) << 663 *(unsigned long *)old_handle = << 664 __idx(new_zhdr, new_bu << 665 if (new_bud == LAST) << 666 *(unsigned long *)old_ << 667 (new_z << 668 write_unlock(&zhdr->slots->loc << 669 add_to_unbuddied(pool, new_zhd << 670 z3fold_page_unlock(new_zhdr); << 671 << 672 *moved_chunks = 0; << 673 } << 674 << 675 return new_zhdr; << 676 << 677 out_fail: << 678 if (new_zhdr && !put_z3fold_locked(new << 679 add_to_unbuddied(pool, new_zhd << 680 z3fold_page_unlock(new_zhdr); << 681 } << 682 return NULL; << 683 << 684 } << 685 << 686 #define BIG_CHUNK_GAP 3 361 #define BIG_CHUNK_GAP 3 687 /* Has to be called with lock held */ 362 /* Has to be called with lock held */ 688 static int z3fold_compact_page(struct z3fold_h 363 static int z3fold_compact_page(struct z3fold_header *zhdr) 689 { 364 { 690 struct page *page = virt_to_page(zhdr) 365 struct page *page = virt_to_page(zhdr); 691 366 692 if (test_bit(MIDDLE_CHUNK_MAPPED, &pag 367 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 693 return 0; /* can't move middle 368 return 0; /* can't move middle chunk, it's used */ 694 369 695 if (unlikely(PageIsolated(page))) << 696 return 0; << 697 << 698 if (zhdr->middle_chunks == 0) 370 if (zhdr->middle_chunks == 0) 699 return 0; /* nothing to compac 371 return 0; /* nothing to compact */ 700 372 701 if (zhdr->first_chunks == 0 && zhdr->l 373 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 702 /* move to the beginning */ 374 /* move to the beginning */ 703 mchunk_memmove(zhdr, ZHDR_CHUN 375 mchunk_memmove(zhdr, ZHDR_CHUNKS); 704 zhdr->first_chunks = zhdr->mid 376 zhdr->first_chunks = zhdr->middle_chunks; 705 zhdr->middle_chunks = 0; 377 zhdr->middle_chunks = 0; 706 zhdr->start_middle = 0; 378 zhdr->start_middle = 0; 707 zhdr->first_num++; 379 zhdr->first_num++; 708 return 1; 380 return 1; 709 } 381 } 710 382 711 /* 383 /* 712 * moving data is expensive, so let's 384 * moving data is expensive, so let's only do that if 713 * there's substantial gain (at least 385 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 714 */ 386 */ 715 if (zhdr->first_chunks != 0 && zhdr->l 387 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 716 zhdr->start_middle - (zhdr->first_ 388 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 717 BIG_CHUNK_GAP) { 389 BIG_CHUNK_GAP) { 718 mchunk_memmove(zhdr, zhdr->fir 390 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 719 zhdr->start_middle = zhdr->fir 391 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 720 return 1; 392 return 1; 721 } else if (zhdr->last_chunks != 0 && z 393 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 722 TOTAL_CHUNKS - (zhdr->last_ 394 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 723 + zhdr 395 + zhdr->middle_chunks) >= 724 BIG_CHUNK_GAP) { 396 BIG_CHUNK_GAP) { 725 unsigned short new_start = TOT 397 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 726 zhdr->middle_chunks; 398 zhdr->middle_chunks; 727 mchunk_memmove(zhdr, new_start 399 mchunk_memmove(zhdr, new_start); 728 zhdr->start_middle = new_start 400 zhdr->start_middle = new_start; 729 return 1; 401 return 1; 730 } 402 } 731 403 732 return 0; 404 return 0; 733 } 405 } 734 406 735 static void do_compact_page(struct z3fold_head 407 static void do_compact_page(struct z3fold_header *zhdr, bool locked) 736 { 408 { 737 struct z3fold_pool *pool = zhdr_to_poo !! 409 struct z3fold_pool *pool = zhdr->pool; 738 struct page *page; 410 struct page *page; >> 411 struct list_head *unbuddied; >> 412 int fchunks; 739 413 740 page = virt_to_page(zhdr); 414 page = virt_to_page(zhdr); 741 if (locked) 415 if (locked) 742 WARN_ON(z3fold_page_trylock(zh 416 WARN_ON(z3fold_page_trylock(zhdr)); 743 else 417 else 744 z3fold_page_lock(zhdr); 418 z3fold_page_lock(zhdr); 745 if (WARN_ON(!test_and_clear_bit(NEEDS_ 419 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 746 z3fold_page_unlock(zhdr); 420 z3fold_page_unlock(zhdr); 747 return; 421 return; 748 } 422 } 749 spin_lock(&pool->lock); 423 spin_lock(&pool->lock); 750 list_del_init(&zhdr->buddy); 424 list_del_init(&zhdr->buddy); 751 spin_unlock(&pool->lock); 425 spin_unlock(&pool->lock); 752 426 753 if (put_z3fold_locked(zhdr)) !! 427 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 754 return; !! 428 atomic64_dec(&pool->pages_nr); 755 << 756 if (test_bit(PAGE_STALE, &page->privat << 757 test_and_set_bit(PAGE_CLAIMED, &pa << 758 z3fold_page_unlock(zhdr); << 759 return; << 760 } << 761 << 762 if (!zhdr->foreign_handles && buddy_si << 763 zhdr->mapped_count == 0 && compact << 764 if (!put_z3fold_locked(zhdr)) << 765 clear_bit(PAGE_CLAIMED << 766 z3fold_page_unlock(zhd << 767 } << 768 return; 429 return; 769 } 430 } 770 431 771 z3fold_compact_page(zhdr); 432 z3fold_compact_page(zhdr); 772 add_to_unbuddied(pool, zhdr); !! 433 unbuddied = get_cpu_ptr(pool->unbuddied); 773 clear_bit(PAGE_CLAIMED, &page->private !! 434 fchunks = num_free_chunks(zhdr); >> 435 if (fchunks < NCHUNKS && >> 436 (!zhdr->first_chunks || !zhdr->middle_chunks || >> 437 !zhdr->last_chunks)) { >> 438 /* the page's not completely free and it's unbuddied */ >> 439 spin_lock(&pool->lock); >> 440 list_add(&zhdr->buddy, &unbuddied[fchunks]); >> 441 spin_unlock(&pool->lock); >> 442 zhdr->cpu = smp_processor_id(); >> 443 } >> 444 put_cpu_ptr(pool->unbuddied); 774 z3fold_page_unlock(zhdr); 445 z3fold_page_unlock(zhdr); 775 } 446 } 776 447 777 static void compact_page_work(struct work_stru 448 static void compact_page_work(struct work_struct *w) 778 { 449 { 779 struct z3fold_header *zhdr = container 450 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 780 451 work); 781 452 782 do_compact_page(zhdr, false); 453 do_compact_page(zhdr, false); 783 } 454 } 784 455 785 /* returns _locked_ z3fold page header or NULL << 786 static inline struct z3fold_header *__z3fold_a << 787 << 788 { << 789 struct z3fold_header *zhdr = NULL; << 790 struct page *page; << 791 struct list_head *unbuddied; << 792 int chunks = size_to_chunks(size), i; << 793 << 794 lookup: << 795 migrate_disable(); << 796 /* First, try to find an unbuddied z3f << 797 unbuddied = this_cpu_ptr(pool->unbuddi << 798 for_each_unbuddied_list(i, chunks) { << 799 struct list_head *l = &unbuddi << 800 << 801 zhdr = list_first_entry_or_nul << 802 struct << 803 << 804 if (!zhdr) << 805 continue; << 806 << 807 /* Re-check under lock. */ << 808 spin_lock(&pool->lock); << 809 if (unlikely(zhdr != list_firs << 810 << 811 !z3fold_page_trylock(zhdr) << 812 spin_unlock(&pool->loc << 813 zhdr = NULL; << 814 migrate_enable(); << 815 if (can_sleep) << 816 cond_resched() << 817 goto lookup; << 818 } << 819 list_del_init(&zhdr->buddy); << 820 zhdr->cpu = -1; << 821 spin_unlock(&pool->lock); << 822 << 823 page = virt_to_page(zhdr); << 824 if (test_bit(NEEDS_COMPACTING, << 825 test_bit(PAGE_CLAIMED, &pa << 826 z3fold_page_unlock(zhd << 827 zhdr = NULL; << 828 migrate_enable(); << 829 if (can_sleep) << 830 cond_resched() << 831 goto lookup; << 832 } << 833 << 834 /* << 835 * this page could not be remo << 836 * list while pool lock was he << 837 * page lock so kref_put could << 838 * we got here, so it's safe t << 839 */ << 840 kref_get(&zhdr->refcount); << 841 break; << 842 } << 843 migrate_enable(); << 844 << 845 if (!zhdr) { << 846 int cpu; << 847 << 848 /* look for _exact_ match on o << 849 for_each_online_cpu(cpu) { << 850 struct list_head *l; << 851 << 852 unbuddied = per_cpu_pt << 853 spin_lock(&pool->lock) << 854 l = &unbuddied[chunks] << 855 << 856 zhdr = list_first_entr << 857 << 858 << 859 if (!zhdr || !z3fold_p << 860 spin_unlock(&p << 861 zhdr = NULL; << 862 continue; << 863 } << 864 list_del_init(&zhdr->b << 865 zhdr->cpu = -1; << 866 spin_unlock(&pool->loc << 867 << 868 page = virt_to_page(zh << 869 if (test_bit(NEEDS_COM << 870 test_bit(PAGE_CLAI << 871 z3fold_page_un << 872 zhdr = NULL; << 873 if (can_sleep) << 874 cond_r << 875 continue; << 876 } << 877 kref_get(&zhdr->refcou << 878 break; << 879 } << 880 } << 881 << 882 if (zhdr && !zhdr->slots) { << 883 zhdr->slots = alloc_slots(pool << 884 if (!zhdr->slots) << 885 goto out_fail; << 886 } << 887 return zhdr; << 888 << 889 out_fail: << 890 if (!put_z3fold_locked(zhdr)) { << 891 add_to_unbuddied(pool, zhdr); << 892 z3fold_page_unlock(zhdr); << 893 } << 894 return NULL; << 895 } << 896 456 897 /* 457 /* 898 * API Functions 458 * API Functions 899 */ 459 */ 900 460 901 /** 461 /** 902 * z3fold_create_pool() - create a new z3fold 462 * z3fold_create_pool() - create a new z3fold pool 903 * @name: pool name 463 * @name: pool name 904 * @gfp: gfp flags when allocating the 464 * @gfp: gfp flags when allocating the z3fold pool structure >> 465 * @ops: user-defined operations for the z3fold pool 905 * 466 * 906 * Return: pointer to the new z3fold pool or N 467 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 907 * failed. 468 * failed. 908 */ 469 */ 909 static struct z3fold_pool *z3fold_create_pool( !! 470 static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, >> 471 const struct z3fold_ops *ops) 910 { 472 { 911 struct z3fold_pool *pool = NULL; 473 struct z3fold_pool *pool = NULL; 912 int i, cpu; 474 int i, cpu; 913 475 914 pool = kzalloc(sizeof(struct z3fold_po 476 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 915 if (!pool) 477 if (!pool) 916 goto out; 478 goto out; 917 pool->c_handle = kmem_cache_create("z3 << 918 sizeof(struct << 919 SLOTS_ALIGN, 0 << 920 if (!pool->c_handle) << 921 goto out_c; << 922 spin_lock_init(&pool->lock); 479 spin_lock_init(&pool->lock); 923 spin_lock_init(&pool->stale_lock); 480 spin_lock_init(&pool->stale_lock); 924 pool->unbuddied = __alloc_percpu(sizeo !! 481 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 925 __ali << 926 if (!pool->unbuddied) 482 if (!pool->unbuddied) 927 goto out_pool; 483 goto out_pool; 928 for_each_possible_cpu(cpu) { 484 for_each_possible_cpu(cpu) { 929 struct list_head *unbuddied = 485 struct list_head *unbuddied = 930 per_cpu_ptr(po 486 per_cpu_ptr(pool->unbuddied, cpu); 931 for_each_unbuddied_list(i, 0) 487 for_each_unbuddied_list(i, 0) 932 INIT_LIST_HEAD(&unbudd 488 INIT_LIST_HEAD(&unbuddied[i]); 933 } 489 } >> 490 INIT_LIST_HEAD(&pool->lru); 934 INIT_LIST_HEAD(&pool->stale); 491 INIT_LIST_HEAD(&pool->stale); 935 atomic64_set(&pool->pages_nr, 0); 492 atomic64_set(&pool->pages_nr, 0); 936 pool->name = name; 493 pool->name = name; 937 pool->compact_wq = create_singlethread 494 pool->compact_wq = create_singlethread_workqueue(pool->name); 938 if (!pool->compact_wq) 495 if (!pool->compact_wq) 939 goto out_unbuddied; 496 goto out_unbuddied; 940 pool->release_wq = create_singlethread 497 pool->release_wq = create_singlethread_workqueue(pool->name); 941 if (!pool->release_wq) 498 if (!pool->release_wq) 942 goto out_wq; 499 goto out_wq; 943 INIT_WORK(&pool->work, free_pages_work 500 INIT_WORK(&pool->work, free_pages_work); >> 501 pool->ops = ops; 944 return pool; 502 return pool; 945 503 946 out_wq: 504 out_wq: 947 destroy_workqueue(pool->compact_wq); 505 destroy_workqueue(pool->compact_wq); 948 out_unbuddied: 506 out_unbuddied: 949 free_percpu(pool->unbuddied); 507 free_percpu(pool->unbuddied); 950 out_pool: 508 out_pool: 951 kmem_cache_destroy(pool->c_handle); << 952 out_c: << 953 kfree(pool); 509 kfree(pool); 954 out: 510 out: 955 return NULL; 511 return NULL; 956 } 512 } 957 513 958 /** 514 /** 959 * z3fold_destroy_pool() - destroys an existin 515 * z3fold_destroy_pool() - destroys an existing z3fold pool 960 * @pool: the z3fold pool to be destroye 516 * @pool: the z3fold pool to be destroyed 961 * 517 * 962 * The pool should be emptied before this func 518 * The pool should be emptied before this function is called. 963 */ 519 */ 964 static void z3fold_destroy_pool(struct z3fold_ 520 static void z3fold_destroy_pool(struct z3fold_pool *pool) 965 { 521 { 966 kmem_cache_destroy(pool->c_handle); << 967 << 968 /* << 969 * We need to destroy pool->compact_wq << 970 * as any pending work on pool->compac << 971 * queue_work(pool->release_wq, &pool- << 972 * << 973 * There are still outstanding pages u << 974 * so we cannot unregister migration u << 975 */ << 976 << 977 destroy_workqueue(pool->compact_wq); << 978 destroy_workqueue(pool->release_wq); 522 destroy_workqueue(pool->release_wq); 979 free_percpu(pool->unbuddied); !! 523 destroy_workqueue(pool->compact_wq); 980 kfree(pool); 524 kfree(pool); 981 } 525 } 982 526 983 static const struct movable_operations z3fold_ << 984 << 985 /** 527 /** 986 * z3fold_alloc() - allocates a region of a gi 528 * z3fold_alloc() - allocates a region of a given size 987 * @pool: z3fold pool from which to allo 529 * @pool: z3fold pool from which to allocate 988 * @size: size in bytes of the desired a 530 * @size: size in bytes of the desired allocation 989 * @gfp: gfp flags used if the pool nee 531 * @gfp: gfp flags used if the pool needs to grow 990 * @handle: handle of the new allocation 532 * @handle: handle of the new allocation 991 * 533 * 992 * This function will attempt to find a free r 534 * This function will attempt to find a free region in the pool large enough to 993 * satisfy the allocation request. A search o 535 * satisfy the allocation request. A search of the unbuddied lists is 994 * performed first. If no suitable free region 536 * performed first. If no suitable free region is found, then a new page is 995 * allocated and added to the pool to satisfy 537 * allocated and added to the pool to satisfy the request. 996 * 538 * >> 539 * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used >> 540 * as z3fold pool pages. >> 541 * 997 * Return: 0 if success and handle is set, oth 542 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 998 * gfp arguments are invalid or -ENOMEM if the 543 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 999 * a new page. 544 * a new page. 1000 */ 545 */ 1001 static int z3fold_alloc(struct z3fold_pool *p 546 static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 1002 unsigned long *handle 547 unsigned long *handle) 1003 { 548 { 1004 int chunks = size_to_chunks(size); !! 549 int chunks = 0, i, freechunks; 1005 struct z3fold_header *zhdr = NULL; 550 struct z3fold_header *zhdr = NULL; 1006 struct page *page = NULL; 551 struct page *page = NULL; 1007 enum buddy bud; 552 enum buddy bud; 1008 bool can_sleep = gfpflags_allow_block !! 553 bool can_sleep = (gfp & __GFP_RECLAIM) == __GFP_RECLAIM; 1009 554 1010 if (!size || (gfp & __GFP_HIGHMEM)) 555 if (!size || (gfp & __GFP_HIGHMEM)) 1011 return -EINVAL; 556 return -EINVAL; 1012 557 1013 if (size > PAGE_SIZE) 558 if (size > PAGE_SIZE) 1014 return -ENOSPC; 559 return -ENOSPC; 1015 560 1016 if (size > PAGE_SIZE - ZHDR_SIZE_ALIG 561 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 1017 bud = HEADLESS; 562 bud = HEADLESS; 1018 else { 563 else { 1019 retry: !! 564 struct list_head *unbuddied; 1020 zhdr = __z3fold_alloc(pool, s !! 565 chunks = size_to_chunks(size); >> 566 >> 567 lookup: >> 568 /* First, try to find an unbuddied z3fold page. */ >> 569 unbuddied = get_cpu_ptr(pool->unbuddied); >> 570 for_each_unbuddied_list(i, chunks) { >> 571 struct list_head *l = &unbuddied[i]; >> 572 >> 573 zhdr = list_first_entry_or_null(READ_ONCE(l), >> 574 struct z3fold_header, buddy); >> 575 >> 576 if (!zhdr) >> 577 continue; >> 578 >> 579 /* Re-check under lock. */ >> 580 spin_lock(&pool->lock); >> 581 l = &unbuddied[i]; >> 582 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), >> 583 struct z3fold_header, buddy)) || >> 584 !z3fold_page_trylock(zhdr)) { >> 585 spin_unlock(&pool->lock); >> 586 put_cpu_ptr(pool->unbuddied); >> 587 goto lookup; >> 588 } >> 589 list_del_init(&zhdr->buddy); >> 590 zhdr->cpu = -1; >> 591 spin_unlock(&pool->lock); >> 592 >> 593 page = virt_to_page(zhdr); >> 594 if (test_bit(NEEDS_COMPACTING, &page->private)) { >> 595 z3fold_page_unlock(zhdr); >> 596 zhdr = NULL; >> 597 put_cpu_ptr(pool->unbuddied); >> 598 if (can_sleep) >> 599 cond_resched(); >> 600 goto lookup; >> 601 } >> 602 >> 603 /* >> 604 * this page could not be removed from its unbuddied >> 605 * list while pool lock was held, and then we've taken >> 606 * page lock so kref_put could not be called before >> 607 * we got here, so it's safe to just call kref_get() >> 608 */ >> 609 kref_get(&zhdr->refcount); >> 610 break; >> 611 } >> 612 put_cpu_ptr(pool->unbuddied); >> 613 1021 if (zhdr) { 614 if (zhdr) { 1022 bud = get_free_buddy( !! 615 if (zhdr->first_chunks == 0) { 1023 if (bud == HEADLESS) !! 616 if (zhdr->middle_chunks != 0 && 1024 if (!put_z3fo !! 617 chunks >= zhdr->start_middle) >> 618 bud = LAST; >> 619 else >> 620 bud = FIRST; >> 621 } else if (zhdr->last_chunks == 0) >> 622 bud = LAST; >> 623 else if (zhdr->middle_chunks == 0) >> 624 bud = MIDDLE; >> 625 else { >> 626 if (kref_put(&zhdr->refcount, >> 627 release_z3fold_page_locked)) >> 628 atomic64_dec(&pool->pages_nr); >> 629 else 1025 z3fol 630 z3fold_page_unlock(zhdr); 1026 pr_err("No fr 631 pr_err("No free chunks in unbuddied\n"); 1027 WARN_ON(1); 632 WARN_ON(1); 1028 goto retry; !! 633 goto lookup; 1029 } 634 } 1030 page = virt_to_page(z << 1031 goto found; 635 goto found; 1032 } 636 } 1033 bud = FIRST; 637 bud = FIRST; 1034 } 638 } 1035 639 1036 page = alloc_page(gfp); !! 640 spin_lock(&pool->stale_lock); >> 641 zhdr = list_first_entry_or_null(&pool->stale, >> 642 struct z3fold_header, buddy); >> 643 /* >> 644 * Before allocating a page, let's see if we can take one from the >> 645 * stale pages list. cancel_work_sync() can sleep so we must make >> 646 * sure it won't be called in case we're in atomic context. >> 647 */ >> 648 if (zhdr && (can_sleep || !work_pending(&zhdr->work))) { >> 649 list_del(&zhdr->buddy); >> 650 spin_unlock(&pool->stale_lock); >> 651 if (can_sleep) >> 652 cancel_work_sync(&zhdr->work); >> 653 page = virt_to_page(zhdr); >> 654 } else { >> 655 spin_unlock(&pool->stale_lock); >> 656 page = alloc_page(gfp); >> 657 } >> 658 1037 if (!page) 659 if (!page) 1038 return -ENOMEM; 660 return -ENOMEM; 1039 661 1040 zhdr = init_z3fold_page(page, bud == << 1041 if (!zhdr) { << 1042 __free_page(page); << 1043 return -ENOMEM; << 1044 } << 1045 atomic64_inc(&pool->pages_nr); 662 atomic64_inc(&pool->pages_nr); >> 663 zhdr = init_z3fold_page(page, pool); 1046 664 1047 if (bud == HEADLESS) { 665 if (bud == HEADLESS) { 1048 set_bit(PAGE_HEADLESS, &page- 666 set_bit(PAGE_HEADLESS, &page->private); 1049 goto headless; 667 goto headless; 1050 } 668 } 1051 if (can_sleep) { << 1052 lock_page(page); << 1053 __SetPageMovable(page, &z3fol << 1054 unlock_page(page); << 1055 } else { << 1056 WARN_ON(!trylock_page(page)); << 1057 __SetPageMovable(page, &z3fol << 1058 unlock_page(page); << 1059 } << 1060 z3fold_page_lock(zhdr); 669 z3fold_page_lock(zhdr); 1061 670 1062 found: 671 found: 1063 if (bud == FIRST) 672 if (bud == FIRST) 1064 zhdr->first_chunks = chunks; 673 zhdr->first_chunks = chunks; 1065 else if (bud == LAST) 674 else if (bud == LAST) 1066 zhdr->last_chunks = chunks; 675 zhdr->last_chunks = chunks; 1067 else { 676 else { 1068 zhdr->middle_chunks = chunks; 677 zhdr->middle_chunks = chunks; 1069 zhdr->start_middle = zhdr->fi 678 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 1070 } 679 } 1071 add_to_unbuddied(pool, zhdr); !! 680 >> 681 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || >> 682 zhdr->middle_chunks == 0) { >> 683 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); >> 684 >> 685 /* Add to unbuddied list */ >> 686 freechunks = num_free_chunks(zhdr); >> 687 spin_lock(&pool->lock); >> 688 list_add(&zhdr->buddy, &unbuddied[freechunks]); >> 689 spin_unlock(&pool->lock); >> 690 zhdr->cpu = smp_processor_id(); >> 691 put_cpu_ptr(pool->unbuddied); >> 692 } 1072 693 1073 headless: 694 headless: 1074 spin_lock(&pool->lock); 695 spin_lock(&pool->lock); >> 696 /* Add/move z3fold page to beginning of LRU */ >> 697 if (!list_empty(&page->lru)) >> 698 list_del(&page->lru); >> 699 >> 700 list_add(&page->lru, &pool->lru); >> 701 1075 *handle = encode_handle(zhdr, bud); 702 *handle = encode_handle(zhdr, bud); 1076 spin_unlock(&pool->lock); 703 spin_unlock(&pool->lock); 1077 if (bud != HEADLESS) 704 if (bud != HEADLESS) 1078 z3fold_page_unlock(zhdr); 705 z3fold_page_unlock(zhdr); 1079 706 1080 return 0; 707 return 0; 1081 } 708 } 1082 709 1083 /** 710 /** 1084 * z3fold_free() - frees the allocation assoc 711 * z3fold_free() - frees the allocation associated with the given handle 1085 * @pool: pool in which the allocation 712 * @pool: pool in which the allocation resided 1086 * @handle: handle associated with the al 713 * @handle: handle associated with the allocation returned by z3fold_alloc() 1087 * 714 * 1088 * In the case that the z3fold page in which 715 * In the case that the z3fold page in which the allocation resides is under 1089 * reclaim, as indicated by the PAGE_CLAIMED !! 716 * reclaim, as indicated by the PG_reclaim flag being set, this function 1090 * only sets the first|middle|last_chunks to !! 717 * only sets the first|last_chunks to 0. The page is actually freed 1091 * once all buddies are evicted (see z3fold_r !! 718 * once both buddies are evicted (see z3fold_reclaim_page() below). 1092 */ 719 */ 1093 static void z3fold_free(struct z3fold_pool *p 720 static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 1094 { 721 { 1095 struct z3fold_header *zhdr; 722 struct z3fold_header *zhdr; 1096 struct page *page; 723 struct page *page; 1097 enum buddy bud; 724 enum buddy bud; 1098 bool page_claimed; << 1099 725 1100 zhdr = get_z3fold_header(handle); !! 726 zhdr = handle_to_z3fold_header(handle); 1101 page = virt_to_page(zhdr); 727 page = virt_to_page(zhdr); 1102 page_claimed = test_and_set_bit(PAGE_ << 1103 728 1104 if (test_bit(PAGE_HEADLESS, &page->pr 729 if (test_bit(PAGE_HEADLESS, &page->private)) { 1105 /* if a headless page is unde 730 /* if a headless page is under reclaim, just leave. 1106 * NB: we use test_and_set_bi 731 * NB: we use test_and_set_bit for a reason: if the bit 1107 * has not been set before, w 732 * has not been set before, we release this page 1108 * immediately so we don't ca 733 * immediately so we don't care about its value any more. 1109 */ 734 */ 1110 if (!page_claimed) { !! 735 if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1111 put_z3fold_header(zhd !! 736 spin_lock(&pool->lock); 1112 free_z3fold_page(page !! 737 list_del(&page->lru); >> 738 spin_unlock(&pool->lock); >> 739 free_z3fold_page(page); 1113 atomic64_dec(&pool->p 740 atomic64_dec(&pool->pages_nr); 1114 } 741 } 1115 return; 742 return; 1116 } 743 } 1117 744 1118 /* Non-headless case */ 745 /* Non-headless case */ >> 746 z3fold_page_lock(zhdr); 1119 bud = handle_to_buddy(handle); 747 bud = handle_to_buddy(handle); 1120 748 1121 switch (bud) { 749 switch (bud) { 1122 case FIRST: 750 case FIRST: 1123 zhdr->first_chunks = 0; 751 zhdr->first_chunks = 0; 1124 break; 752 break; 1125 case MIDDLE: 753 case MIDDLE: 1126 zhdr->middle_chunks = 0; 754 zhdr->middle_chunks = 0; 1127 break; 755 break; 1128 case LAST: 756 case LAST: 1129 zhdr->last_chunks = 0; 757 zhdr->last_chunks = 0; 1130 break; 758 break; 1131 default: 759 default: 1132 pr_err("%s: unknown bud %d\n" 760 pr_err("%s: unknown bud %d\n", __func__, bud); 1133 WARN_ON(1); 761 WARN_ON(1); 1134 put_z3fold_header(zhdr); !! 762 z3fold_page_unlock(zhdr); 1135 return; 763 return; 1136 } 764 } 1137 765 1138 if (!page_claimed) !! 766 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 1139 free_handle(handle, zhdr); !! 767 atomic64_dec(&pool->pages_nr); 1140 if (put_z3fold_locked_list(zhdr)) << 1141 return; 768 return; 1142 if (page_claimed) { !! 769 } 1143 /* the page has not been clai !! 770 if (test_bit(PAGE_CLAIMED, &page->private)) { 1144 put_z3fold_header(zhdr); !! 771 z3fold_page_unlock(zhdr); 1145 return; 772 return; 1146 } 773 } 1147 if (test_and_set_bit(NEEDS_COMPACTING 774 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1148 clear_bit(PAGE_CLAIMED, &page !! 775 z3fold_page_unlock(zhdr); 1149 put_z3fold_header(zhdr); << 1150 return; 776 return; 1151 } 777 } 1152 if (zhdr->cpu < 0 || !cpu_online(zhdr 778 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { >> 779 spin_lock(&pool->lock); >> 780 list_del_init(&zhdr->buddy); >> 781 spin_unlock(&pool->lock); 1153 zhdr->cpu = -1; 782 zhdr->cpu = -1; 1154 kref_get(&zhdr->refcount); 783 kref_get(&zhdr->refcount); 1155 clear_bit(PAGE_CLAIMED, &page << 1156 do_compact_page(zhdr, true); 784 do_compact_page(zhdr, true); 1157 return; 785 return; 1158 } 786 } 1159 kref_get(&zhdr->refcount); 787 kref_get(&zhdr->refcount); 1160 clear_bit(PAGE_CLAIMED, &page->privat << 1161 queue_work_on(zhdr->cpu, pool->compac 788 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1162 put_z3fold_header(zhdr); !! 789 z3fold_page_unlock(zhdr); >> 790 } >> 791 >> 792 /** >> 793 * z3fold_reclaim_page() - evicts allocations from a pool page and frees it >> 794 * @pool: pool from which a page will attempt to be evicted >> 795 * @retires: number of pages on the LRU list for which eviction will >> 796 * be attempted before failing >> 797 * >> 798 * z3fold reclaim is different from normal system reclaim in that it is done >> 799 * from the bottom, up. This is because only the bottom layer, z3fold, has >> 800 * information on how the allocations are organized within each z3fold page. >> 801 * This has the potential to create interesting locking situations between >> 802 * z3fold and the user, however. >> 803 * >> 804 * To avoid these, this is how z3fold_reclaim_page() should be called: >> 805 >> 806 * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). >> 807 * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and >> 808 * call the user-defined eviction handler with the pool and handle as >> 809 * arguments. >> 810 * >> 811 * If the handle can not be evicted, the eviction handler should return >> 812 * non-zero. z3fold_reclaim_page() will add the z3fold page back to the >> 813 * appropriate list and try the next z3fold page on the LRU up to >> 814 * a user defined number of retries. >> 815 * >> 816 * If the handle is successfully evicted, the eviction handler should >> 817 * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() >> 818 * contains logic to delay freeing the page if the page is under reclaim, >> 819 * as indicated by the setting of the PG_reclaim flag on the underlying page. >> 820 * >> 821 * If all buddies in the z3fold page are successfully evicted, then the >> 822 * z3fold page can be freed. >> 823 * >> 824 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are >> 825 * no pages to evict or an eviction handler is not registered, -EAGAIN if >> 826 * the retry limit was hit. >> 827 */ >> 828 static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) >> 829 { >> 830 int i, ret = 0; >> 831 struct z3fold_header *zhdr = NULL; >> 832 struct page *page = NULL; >> 833 struct list_head *pos; >> 834 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; >> 835 >> 836 spin_lock(&pool->lock); >> 837 if (!pool->ops || !pool->ops->evict || retries == 0) { >> 838 spin_unlock(&pool->lock); >> 839 return -EINVAL; >> 840 } >> 841 for (i = 0; i < retries; i++) { >> 842 if (list_empty(&pool->lru)) { >> 843 spin_unlock(&pool->lock); >> 844 return -EINVAL; >> 845 } >> 846 list_for_each_prev(pos, &pool->lru) { >> 847 page = list_entry(pos, struct page, lru); >> 848 >> 849 /* this bit could have been set by free, in which case >> 850 * we pass over to the next page in the pool. >> 851 */ >> 852 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) >> 853 continue; >> 854 >> 855 zhdr = page_address(page); >> 856 if (test_bit(PAGE_HEADLESS, &page->private)) >> 857 break; >> 858 >> 859 if (!z3fold_page_trylock(zhdr)) { >> 860 zhdr = NULL; >> 861 continue; /* can't evict at this point */ >> 862 } >> 863 kref_get(&zhdr->refcount); >> 864 list_del_init(&zhdr->buddy); >> 865 zhdr->cpu = -1; >> 866 break; >> 867 } >> 868 >> 869 if (!zhdr) >> 870 break; >> 871 >> 872 list_del_init(&page->lru); >> 873 spin_unlock(&pool->lock); >> 874 >> 875 if (!test_bit(PAGE_HEADLESS, &page->private)) { >> 876 /* >> 877 * We need encode the handles before unlocking, since >> 878 * we can race with free that will set >> 879 * (first|last)_chunks to 0 >> 880 */ >> 881 first_handle = 0; >> 882 last_handle = 0; >> 883 middle_handle = 0; >> 884 if (zhdr->first_chunks) >> 885 first_handle = encode_handle(zhdr, FIRST); >> 886 if (zhdr->middle_chunks) >> 887 middle_handle = encode_handle(zhdr, MIDDLE); >> 888 if (zhdr->last_chunks) >> 889 last_handle = encode_handle(zhdr, LAST); >> 890 /* >> 891 * it's safe to unlock here because we hold a >> 892 * reference to this page >> 893 */ >> 894 z3fold_page_unlock(zhdr); >> 895 } else { >> 896 first_handle = encode_handle(zhdr, HEADLESS); >> 897 last_handle = middle_handle = 0; >> 898 } >> 899 >> 900 /* Issue the eviction callback(s) */ >> 901 if (middle_handle) { >> 902 ret = pool->ops->evict(pool, middle_handle); >> 903 if (ret) >> 904 goto next; >> 905 } >> 906 if (first_handle) { >> 907 ret = pool->ops->evict(pool, first_handle); >> 908 if (ret) >> 909 goto next; >> 910 } >> 911 if (last_handle) { >> 912 ret = pool->ops->evict(pool, last_handle); >> 913 if (ret) >> 914 goto next; >> 915 } >> 916 next: >> 917 if (test_bit(PAGE_HEADLESS, &page->private)) { >> 918 if (ret == 0) { >> 919 free_z3fold_page(page); >> 920 atomic64_dec(&pool->pages_nr); >> 921 return 0; >> 922 } >> 923 spin_lock(&pool->lock); >> 924 list_add(&page->lru, &pool->lru); >> 925 spin_unlock(&pool->lock); >> 926 } else { >> 927 z3fold_page_lock(zhdr); >> 928 clear_bit(PAGE_CLAIMED, &page->private); >> 929 if (kref_put(&zhdr->refcount, >> 930 release_z3fold_page_locked)) { >> 931 atomic64_dec(&pool->pages_nr); >> 932 return 0; >> 933 } >> 934 /* >> 935 * if we are here, the page is still not completely >> 936 * free. Take the global pool lock then to be able >> 937 * to add it back to the lru list >> 938 */ >> 939 spin_lock(&pool->lock); >> 940 list_add(&page->lru, &pool->lru); >> 941 spin_unlock(&pool->lock); >> 942 z3fold_page_unlock(zhdr); >> 943 } >> 944 >> 945 /* We started off locked to we need to lock the pool back */ >> 946 spin_lock(&pool->lock); >> 947 } >> 948 spin_unlock(&pool->lock); >> 949 return -EAGAIN; 1163 } 950 } 1164 951 1165 /** 952 /** 1166 * z3fold_map() - maps the allocation associa 953 * z3fold_map() - maps the allocation associated with the given handle 1167 * @pool: pool in which the allocation 954 * @pool: pool in which the allocation resides 1168 * @handle: handle associated with the al 955 * @handle: handle associated with the allocation to be mapped 1169 * 956 * 1170 * Extracts the buddy number from handle and 957 * Extracts the buddy number from handle and constructs the pointer to the 1171 * correct starting chunk within the page. 958 * correct starting chunk within the page. 1172 * 959 * 1173 * Returns: a pointer to the mapped allocatio 960 * Returns: a pointer to the mapped allocation 1174 */ 961 */ 1175 static void *z3fold_map(struct z3fold_pool *p 962 static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 1176 { 963 { 1177 struct z3fold_header *zhdr; 964 struct z3fold_header *zhdr; 1178 struct page *page; 965 struct page *page; 1179 void *addr; 966 void *addr; 1180 enum buddy buddy; 967 enum buddy buddy; 1181 968 1182 zhdr = get_z3fold_header(handle); !! 969 zhdr = handle_to_z3fold_header(handle); 1183 addr = zhdr; 970 addr = zhdr; 1184 page = virt_to_page(zhdr); 971 page = virt_to_page(zhdr); 1185 972 1186 if (test_bit(PAGE_HEADLESS, &page->pr 973 if (test_bit(PAGE_HEADLESS, &page->private)) 1187 goto out; 974 goto out; 1188 975 >> 976 z3fold_page_lock(zhdr); 1189 buddy = handle_to_buddy(handle); 977 buddy = handle_to_buddy(handle); 1190 switch (buddy) { 978 switch (buddy) { 1191 case FIRST: 979 case FIRST: 1192 addr += ZHDR_SIZE_ALIGNED; 980 addr += ZHDR_SIZE_ALIGNED; 1193 break; 981 break; 1194 case MIDDLE: 982 case MIDDLE: 1195 addr += zhdr->start_middle << 983 addr += zhdr->start_middle << CHUNK_SHIFT; 1196 set_bit(MIDDLE_CHUNK_MAPPED, 984 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1197 break; 985 break; 1198 case LAST: 986 case LAST: 1199 addr += PAGE_SIZE - (handle_t 987 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 1200 break; 988 break; 1201 default: 989 default: 1202 pr_err("unknown buddy id %d\n 990 pr_err("unknown buddy id %d\n", buddy); 1203 WARN_ON(1); 991 WARN_ON(1); 1204 addr = NULL; 992 addr = NULL; 1205 break; 993 break; 1206 } 994 } 1207 995 1208 if (addr) !! 996 z3fold_page_unlock(zhdr); 1209 zhdr->mapped_count++; << 1210 out: 997 out: 1211 put_z3fold_header(zhdr); << 1212 return addr; 998 return addr; 1213 } 999 } 1214 1000 1215 /** 1001 /** 1216 * z3fold_unmap() - unmaps the allocation ass 1002 * z3fold_unmap() - unmaps the allocation associated with the given handle 1217 * @pool: pool in which the allocation 1003 * @pool: pool in which the allocation resides 1218 * @handle: handle associated with the al 1004 * @handle: handle associated with the allocation to be unmapped 1219 */ 1005 */ 1220 static void z3fold_unmap(struct z3fold_pool * 1006 static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1221 { 1007 { 1222 struct z3fold_header *zhdr; 1008 struct z3fold_header *zhdr; 1223 struct page *page; 1009 struct page *page; 1224 enum buddy buddy; 1010 enum buddy buddy; 1225 1011 1226 zhdr = get_z3fold_header(handle); !! 1012 zhdr = handle_to_z3fold_header(handle); 1227 page = virt_to_page(zhdr); 1013 page = virt_to_page(zhdr); 1228 1014 1229 if (test_bit(PAGE_HEADLESS, &page->pr 1015 if (test_bit(PAGE_HEADLESS, &page->private)) 1230 return; 1016 return; 1231 1017 >> 1018 z3fold_page_lock(zhdr); 1232 buddy = handle_to_buddy(handle); 1019 buddy = handle_to_buddy(handle); 1233 if (buddy == MIDDLE) 1020 if (buddy == MIDDLE) 1234 clear_bit(MIDDLE_CHUNK_MAPPED 1021 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1235 zhdr->mapped_count--; !! 1022 z3fold_page_unlock(zhdr); 1236 put_z3fold_header(zhdr); << 1237 } 1023 } 1238 1024 1239 /** 1025 /** 1240 * z3fold_get_pool_pages() - gets the z3fold !! 1026 * z3fold_get_pool_size() - gets the z3fold pool size in pages 1241 * @pool: pool whose size is being quer 1027 * @pool: pool whose size is being queried 1242 * 1028 * 1243 * Returns: size in pages of the given pool. 1029 * Returns: size in pages of the given pool. 1244 */ 1030 */ 1245 static u64 z3fold_get_pool_pages(struct z3fol !! 1031 static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 1246 { 1032 { 1247 return atomic64_read(&pool->pages_nr) 1033 return atomic64_read(&pool->pages_nr); 1248 } 1034 } 1249 1035 1250 static bool z3fold_page_isolate(struct page * !! 1036 /***************** 1251 { !! 1037 * zpool 1252 struct z3fold_header *zhdr; !! 1038 ****************/ 1253 struct z3fold_pool *pool; << 1254 << 1255 VM_BUG_ON_PAGE(PageIsolated(page), pa << 1256 << 1257 if (test_bit(PAGE_HEADLESS, &page->pr << 1258 return false; << 1259 << 1260 zhdr = page_address(page); << 1261 z3fold_page_lock(zhdr); << 1262 if (test_bit(NEEDS_COMPACTING, &page- << 1263 test_bit(PAGE_STALE, &page->priva << 1264 goto out; << 1265 << 1266 if (zhdr->mapped_count != 0 || zhdr-> << 1267 goto out; << 1268 << 1269 if (test_and_set_bit(PAGE_CLAIMED, &p << 1270 goto out; << 1271 pool = zhdr_to_pool(zhdr); << 1272 spin_lock(&pool->lock); << 1273 if (!list_empty(&zhdr->buddy)) << 1274 list_del_init(&zhdr->buddy); << 1275 spin_unlock(&pool->lock); << 1276 << 1277 kref_get(&zhdr->refcount); << 1278 z3fold_page_unlock(zhdr); << 1279 return true; << 1280 << 1281 out: << 1282 z3fold_page_unlock(zhdr); << 1283 return false; << 1284 } << 1285 << 1286 static int z3fold_page_migrate(struct page *n << 1287 enum migrate_mode mode) << 1288 { << 1289 struct z3fold_header *zhdr, *new_zhdr << 1290 struct z3fold_pool *pool; << 1291 << 1292 VM_BUG_ON_PAGE(!PageIsolated(page), p << 1293 VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED << 1294 VM_BUG_ON_PAGE(!PageLocked(newpage), << 1295 << 1296 zhdr = page_address(page); << 1297 pool = zhdr_to_pool(zhdr); << 1298 << 1299 if (!z3fold_page_trylock(zhdr)) << 1300 return -EAGAIN; << 1301 if (zhdr->mapped_count != 0 || zhdr-> << 1302 clear_bit(PAGE_CLAIMED, &page << 1303 z3fold_page_unlock(zhdr); << 1304 return -EBUSY; << 1305 } << 1306 if (work_pending(&zhdr->work)) { << 1307 z3fold_page_unlock(zhdr); << 1308 return -EAGAIN; << 1309 } << 1310 new_zhdr = page_address(newpage); << 1311 memcpy(new_zhdr, zhdr, PAGE_SIZE); << 1312 newpage->private = page->private; << 1313 set_bit(PAGE_MIGRATED, &page->private << 1314 z3fold_page_unlock(zhdr); << 1315 spin_lock_init(&new_zhdr->page_lock); << 1316 INIT_WORK(&new_zhdr->work, compact_pa << 1317 /* << 1318 * z3fold_page_isolate() ensures that << 1319 * so we only have to reinitialize it << 1320 */ << 1321 INIT_LIST_HEAD(&new_zhdr->buddy); << 1322 __ClearPageMovable(page); << 1323 << 1324 get_page(newpage); << 1325 z3fold_page_lock(new_zhdr); << 1326 if (new_zhdr->first_chunks) << 1327 encode_handle(new_zhdr, FIRST << 1328 if (new_zhdr->last_chunks) << 1329 encode_handle(new_zhdr, LAST) << 1330 if (new_zhdr->middle_chunks) << 1331 encode_handle(new_zhdr, MIDDL << 1332 set_bit(NEEDS_COMPACTING, &newpage->p << 1333 new_zhdr->cpu = smp_processor_id(); << 1334 __SetPageMovable(newpage, &z3fold_mop << 1335 z3fold_page_unlock(new_zhdr); << 1336 << 1337 queue_work_on(new_zhdr->cpu, pool->co << 1338 << 1339 /* PAGE_CLAIMED and PAGE_MIGRATED are << 1340 page->private = 0; << 1341 put_page(page); << 1342 return 0; << 1343 } << 1344 1039 1345 static void z3fold_page_putback(struct page * !! 1040 static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 1346 { 1041 { 1347 struct z3fold_header *zhdr; !! 1042 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 1348 struct z3fold_pool *pool; !! 1043 return pool->zpool_ops->evict(pool->zpool, handle); 1349 !! 1044 else 1350 zhdr = page_address(page); !! 1045 return -ENOENT; 1351 pool = zhdr_to_pool(zhdr); << 1352 << 1353 z3fold_page_lock(zhdr); << 1354 if (!list_empty(&zhdr->buddy)) << 1355 list_del_init(&zhdr->buddy); << 1356 INIT_LIST_HEAD(&page->lru); << 1357 if (put_z3fold_locked(zhdr)) << 1358 return; << 1359 if (list_empty(&zhdr->buddy)) << 1360 add_to_unbuddied(pool, zhdr); << 1361 clear_bit(PAGE_CLAIMED, &page->privat << 1362 z3fold_page_unlock(zhdr); << 1363 } 1046 } 1364 1047 1365 static const struct movable_operations z3fold !! 1048 static const struct z3fold_ops z3fold_zpool_ops = { 1366 .isolate_page = z3fold_page_isolate, !! 1049 .evict = z3fold_zpool_evict 1367 .migrate_page = z3fold_page_migrate, << 1368 .putback_page = z3fold_page_putback, << 1369 }; 1050 }; 1370 1051 1371 /***************** !! 1052 static void *z3fold_zpool_create(const char *name, gfp_t gfp, 1372 * zpool !! 1053 const struct zpool_ops *zpool_ops, 1373 ****************/ !! 1054 struct zpool *zpool) 1374 << 1375 static void *z3fold_zpool_create(const char * << 1376 { 1055 { 1377 return z3fold_create_pool(name, gfp); !! 1056 struct z3fold_pool *pool; >> 1057 >> 1058 pool = z3fold_create_pool(name, gfp, >> 1059 zpool_ops ? &z3fold_zpool_ops : NULL); >> 1060 if (pool) { >> 1061 pool->zpool = zpool; >> 1062 pool->zpool_ops = zpool_ops; >> 1063 } >> 1064 return pool; 1378 } 1065 } 1379 1066 1380 static void z3fold_zpool_destroy(void *pool) 1067 static void z3fold_zpool_destroy(void *pool) 1381 { 1068 { 1382 z3fold_destroy_pool(pool); 1069 z3fold_destroy_pool(pool); 1383 } 1070 } 1384 1071 1385 static int z3fold_zpool_malloc(void *pool, si 1072 static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1386 unsigned long *handle 1073 unsigned long *handle) 1387 { 1074 { 1388 return z3fold_alloc(pool, size, gfp, 1075 return z3fold_alloc(pool, size, gfp, handle); 1389 } 1076 } 1390 static void z3fold_zpool_free(void *pool, uns 1077 static void z3fold_zpool_free(void *pool, unsigned long handle) 1391 { 1078 { 1392 z3fold_free(pool, handle); 1079 z3fold_free(pool, handle); 1393 } 1080 } 1394 1081 >> 1082 static int z3fold_zpool_shrink(void *pool, unsigned int pages, >> 1083 unsigned int *reclaimed) >> 1084 { >> 1085 unsigned int total = 0; >> 1086 int ret = -EINVAL; >> 1087 >> 1088 while (total < pages) { >> 1089 ret = z3fold_reclaim_page(pool, 8); >> 1090 if (ret < 0) >> 1091 break; >> 1092 total++; >> 1093 } >> 1094 >> 1095 if (reclaimed) >> 1096 *reclaimed = total; >> 1097 >> 1098 return ret; >> 1099 } >> 1100 1395 static void *z3fold_zpool_map(void *pool, uns 1101 static void *z3fold_zpool_map(void *pool, unsigned long handle, 1396 enum zpool_mapmode mm 1102 enum zpool_mapmode mm) 1397 { 1103 { 1398 return z3fold_map(pool, handle); 1104 return z3fold_map(pool, handle); 1399 } 1105 } 1400 static void z3fold_zpool_unmap(void *pool, un 1106 static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1401 { 1107 { 1402 z3fold_unmap(pool, handle); 1108 z3fold_unmap(pool, handle); 1403 } 1109 } 1404 1110 1405 static u64 z3fold_zpool_total_pages(void *poo !! 1111 static u64 z3fold_zpool_total_size(void *pool) 1406 { 1112 { 1407 return z3fold_get_pool_pages(pool); !! 1113 return z3fold_get_pool_size(pool) * PAGE_SIZE; 1408 } 1114 } 1409 1115 1410 static struct zpool_driver z3fold_zpool_drive 1116 static struct zpool_driver z3fold_zpool_driver = { 1411 .type = "z3fold", 1117 .type = "z3fold", 1412 .sleep_mapped = true, << 1413 .owner = THIS_MODULE, 1118 .owner = THIS_MODULE, 1414 .create = z3fold_zpool_create, 1119 .create = z3fold_zpool_create, 1415 .destroy = z3fold_zpool_destroy, 1120 .destroy = z3fold_zpool_destroy, 1416 .malloc = z3fold_zpool_malloc, 1121 .malloc = z3fold_zpool_malloc, 1417 .free = z3fold_zpool_free, 1122 .free = z3fold_zpool_free, >> 1123 .shrink = z3fold_zpool_shrink, 1418 .map = z3fold_zpool_map, 1124 .map = z3fold_zpool_map, 1419 .unmap = z3fold_zpool_unmap, 1125 .unmap = z3fold_zpool_unmap, 1420 .total_pages = z3fold_zpool_total_pa !! 1126 .total_size = z3fold_zpool_total_size, 1421 }; 1127 }; 1422 1128 1423 MODULE_ALIAS("zpool-z3fold"); 1129 MODULE_ALIAS("zpool-z3fold"); 1424 1130 1425 static int __init init_z3fold(void) 1131 static int __init init_z3fold(void) 1426 { 1132 { 1427 /* !! 1133 /* Make sure the z3fold header is not larger than the page size */ 1428 * Make sure the z3fold header is not !! 1134 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 1429 * there has remaining spaces for its << 1430 */ << 1431 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE << 1432 zpool_register_driver(&z3fold_zpool_d 1135 zpool_register_driver(&z3fold_zpool_driver); 1433 1136 1434 return 0; 1137 return 0; 1435 } 1138 } 1436 1139 1437 static void __exit exit_z3fold(void) 1140 static void __exit exit_z3fold(void) 1438 { 1141 { 1439 zpool_unregister_driver(&z3fold_zpool 1142 zpool_unregister_driver(&z3fold_zpool_driver); 1440 } 1143 } 1441 1144 1442 module_init(init_z3fold); 1145 module_init(init_z3fold); 1443 module_exit(exit_z3fold); 1146 module_exit(exit_z3fold); 1444 1147 1445 MODULE_LICENSE("GPL"); 1148 MODULE_LICENSE("GPL"); 1446 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail. 1149 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1447 MODULE_DESCRIPTION("3-Fold Allocator for Comp 1150 MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1448 1151
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.