Lines 1-1661
Link Here
|
1 |
/* |
|
|
2 |
* zcache.c |
3 |
* |
4 |
* Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp. |
5 |
* Copyright (c) 2010,2011, Nitin Gupta |
6 |
* |
7 |
* Zcache provides an in-kernel "host implementation" for transcendent memory |
8 |
* and, thus indirectly, for cleancache and frontswap. Zcache includes two |
9 |
* page-accessible memory [1] interfaces, both utilizing lzo1x compression: |
10 |
* 1) "compression buddies" ("zbud") is used for ephemeral pages |
11 |
* 2) xvmalloc is used for persistent pages. |
12 |
* Xvmalloc (based on the TLSF allocator) has very low fragmentation |
13 |
* so maximizes space efficiency, while zbud allows pairs (and potentially, |
14 |
* in the future, more than a pair of) compressed pages to be closely linked |
15 |
* so that reclaiming can be done via the kernel's physical-page-oriented |
16 |
* "shrinker" interface. |
17 |
* |
18 |
* [1] For a definition of page-accessible memory (aka PAM), see: |
19 |
* http://marc.info/?l=linux-mm&m=127811271605009 |
20 |
*/ |
21 |
|
22 |
#include <linux/cpu.h> |
23 |
#include <linux/highmem.h> |
24 |
#include <linux/list.h> |
25 |
#include <linux/lzo.h> |
26 |
#include <linux/slab.h> |
27 |
#include <linux/spinlock.h> |
28 |
#include <linux/types.h> |
29 |
#include <linux/atomic.h> |
30 |
#include "tmem.h" |
31 |
|
32 |
#include "../zram/xvmalloc.h" /* if built in drivers/staging */ |
33 |
|
34 |
#if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP)) |
35 |
#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP" |
36 |
#endif |
37 |
#ifdef CONFIG_CLEANCACHE |
38 |
#include <linux/cleancache.h> |
39 |
#endif |
40 |
#ifdef CONFIG_FRONTSWAP |
41 |
#include <linux/frontswap.h> |
42 |
#endif |
43 |
|
44 |
#if 0 |
45 |
/* this is more aggressive but may cause other problems? */ |
46 |
#define ZCACHE_GFP_MASK (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN) |
47 |
#else |
48 |
#define ZCACHE_GFP_MASK \ |
49 |
(__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) |
50 |
#endif |
51 |
|
52 |
/********** |
53 |
* Compression buddies ("zbud") provides for packing two (or, possibly |
54 |
* in the future, more) compressed ephemeral pages into a single "raw" |
55 |
* (physical) page and tracking them with data structures so that |
56 |
* the raw pages can be easily reclaimed. |
57 |
* |
58 |
* A zbud page ("zbpg") is an aligned page containing a list_head, |
59 |
* a lock, and two "zbud headers". The remainder of the physical |
60 |
* page is divided up into aligned 64-byte "chunks" which contain |
61 |
* the compressed data for zero, one, or two zbuds. Each zbpg |
62 |
* resides on: (1) an "unused list" if it has no zbuds; (2) a |
63 |
* "buddied" list if it is fully populated with two zbuds; or |
64 |
* (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks |
65 |
* the one unbuddied zbud uses. The data inside a zbpg cannot be |
66 |
* read or written unless the zbpg's lock is held. |
67 |
*/ |
68 |
|
69 |
#define ZBH_SENTINEL 0x43214321 |
70 |
#define ZBPG_SENTINEL 0xdeadbeef |
71 |
|
72 |
#define ZBUD_MAX_BUDS 2 |
73 |
|
74 |
struct zbud_hdr { |
75 |
uint32_t pool_id; |
76 |
struct tmem_oid oid; |
77 |
uint32_t index; |
78 |
uint16_t size; /* compressed size in bytes, zero means unused */ |
79 |
DECL_SENTINEL |
80 |
}; |
81 |
|
82 |
struct zbud_page { |
83 |
struct list_head bud_list; |
84 |
spinlock_t lock; |
85 |
struct zbud_hdr buddy[ZBUD_MAX_BUDS]; |
86 |
DECL_SENTINEL |
87 |
/* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */ |
88 |
}; |
89 |
|
90 |
#define CHUNK_SHIFT 6 |
91 |
#define CHUNK_SIZE (1 << CHUNK_SHIFT) |
92 |
#define CHUNK_MASK (~(CHUNK_SIZE-1)) |
93 |
#define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \ |
94 |
CHUNK_MASK) >> CHUNK_SHIFT) |
95 |
#define MAX_CHUNK (NCHUNKS-1) |
96 |
|
97 |
static struct { |
98 |
struct list_head list; |
99 |
unsigned count; |
100 |
} zbud_unbuddied[NCHUNKS]; |
101 |
/* list N contains pages with N chunks USED and NCHUNKS-N unused */ |
102 |
/* element 0 is never used but optimizing that isn't worth it */ |
103 |
static unsigned long zbud_cumul_chunk_counts[NCHUNKS]; |
104 |
|
105 |
struct list_head zbud_buddied_list; |
106 |
static unsigned long zcache_zbud_buddied_count; |
107 |
|
108 |
/* protects the buddied list and all unbuddied lists */ |
109 |
static DEFINE_SPINLOCK(zbud_budlists_spinlock); |
110 |
|
111 |
static LIST_HEAD(zbpg_unused_list); |
112 |
static unsigned long zcache_zbpg_unused_list_count; |
113 |
|
114 |
/* protects the unused page list */ |
115 |
static DEFINE_SPINLOCK(zbpg_unused_list_spinlock); |
116 |
|
117 |
static atomic_t zcache_zbud_curr_raw_pages; |
118 |
static atomic_t zcache_zbud_curr_zpages; |
119 |
static unsigned long zcache_zbud_curr_zbytes; |
120 |
static unsigned long zcache_zbud_cumul_zpages; |
121 |
static unsigned long zcache_zbud_cumul_zbytes; |
122 |
static unsigned long zcache_compress_poor; |
123 |
|
124 |
/* forward references */ |
125 |
static void *zcache_get_free_page(void); |
126 |
static void zcache_free_page(void *p); |
127 |
|
128 |
/* |
129 |
* zbud helper functions |
130 |
*/ |
131 |
|
132 |
static inline unsigned zbud_max_buddy_size(void) |
133 |
{ |
134 |
return MAX_CHUNK << CHUNK_SHIFT; |
135 |
} |
136 |
|
137 |
static inline unsigned zbud_size_to_chunks(unsigned size) |
138 |
{ |
139 |
BUG_ON(size == 0 || size > zbud_max_buddy_size()); |
140 |
return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; |
141 |
} |
142 |
|
143 |
static inline int zbud_budnum(struct zbud_hdr *zh) |
144 |
{ |
145 |
unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1); |
146 |
struct zbud_page *zbpg = NULL; |
147 |
unsigned budnum = -1U; |
148 |
int i; |
149 |
|
150 |
for (i = 0; i < ZBUD_MAX_BUDS; i++) |
151 |
if (offset == offsetof(typeof(*zbpg), buddy[i])) { |
152 |
budnum = i; |
153 |
break; |
154 |
} |
155 |
BUG_ON(budnum == -1U); |
156 |
return budnum; |
157 |
} |
158 |
|
159 |
static char *zbud_data(struct zbud_hdr *zh, unsigned size) |
160 |
{ |
161 |
struct zbud_page *zbpg; |
162 |
char *p; |
163 |
unsigned budnum; |
164 |
|
165 |
ASSERT_SENTINEL(zh, ZBH); |
166 |
budnum = zbud_budnum(zh); |
167 |
BUG_ON(size == 0 || size > zbud_max_buddy_size()); |
168 |
zbpg = container_of(zh, struct zbud_page, buddy[budnum]); |
169 |
ASSERT_SPINLOCK(&zbpg->lock); |
170 |
p = (char *)zbpg; |
171 |
if (budnum == 0) |
172 |
p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) & |
173 |
CHUNK_MASK); |
174 |
else if (budnum == 1) |
175 |
p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); |
176 |
return p; |
177 |
} |
178 |
|
179 |
/* |
180 |
* zbud raw page management |
181 |
*/ |
182 |
|
183 |
static struct zbud_page *zbud_alloc_raw_page(void) |
184 |
{ |
185 |
struct zbud_page *zbpg = NULL; |
186 |
struct zbud_hdr *zh0, *zh1; |
187 |
bool recycled = 0; |
188 |
|
189 |
/* if any pages on the zbpg list, use one */ |
190 |
spin_lock(&zbpg_unused_list_spinlock); |
191 |
if (!list_empty(&zbpg_unused_list)) { |
192 |
zbpg = list_first_entry(&zbpg_unused_list, |
193 |
struct zbud_page, bud_list); |
194 |
list_del_init(&zbpg->bud_list); |
195 |
zcache_zbpg_unused_list_count--; |
196 |
recycled = 1; |
197 |
} |
198 |
spin_unlock(&zbpg_unused_list_spinlock); |
199 |
if (zbpg == NULL) |
200 |
/* none on zbpg list, try to get a kernel page */ |
201 |
zbpg = zcache_get_free_page(); |
202 |
if (likely(zbpg != NULL)) { |
203 |
INIT_LIST_HEAD(&zbpg->bud_list); |
204 |
zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; |
205 |
spin_lock_init(&zbpg->lock); |
206 |
if (recycled) { |
207 |
ASSERT_INVERTED_SENTINEL(zbpg, ZBPG); |
208 |
SET_SENTINEL(zbpg, ZBPG); |
209 |
BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); |
210 |
BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); |
211 |
} else { |
212 |
atomic_inc(&zcache_zbud_curr_raw_pages); |
213 |
INIT_LIST_HEAD(&zbpg->bud_list); |
214 |
SET_SENTINEL(zbpg, ZBPG); |
215 |
zh0->size = 0; zh1->size = 0; |
216 |
tmem_oid_set_invalid(&zh0->oid); |
217 |
tmem_oid_set_invalid(&zh1->oid); |
218 |
} |
219 |
} |
220 |
return zbpg; |
221 |
} |
222 |
|
223 |
static void zbud_free_raw_page(struct zbud_page *zbpg) |
224 |
{ |
225 |
struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1]; |
226 |
|
227 |
ASSERT_SENTINEL(zbpg, ZBPG); |
228 |
BUG_ON(!list_empty(&zbpg->bud_list)); |
229 |
ASSERT_SPINLOCK(&zbpg->lock); |
230 |
BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); |
231 |
BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); |
232 |
INVERT_SENTINEL(zbpg, ZBPG); |
233 |
spin_unlock(&zbpg->lock); |
234 |
spin_lock(&zbpg_unused_list_spinlock); |
235 |
list_add(&zbpg->bud_list, &zbpg_unused_list); |
236 |
zcache_zbpg_unused_list_count++; |
237 |
spin_unlock(&zbpg_unused_list_spinlock); |
238 |
} |
239 |
|
240 |
/* |
241 |
* core zbud handling routines |
242 |
*/ |
243 |
|
244 |
static unsigned zbud_free(struct zbud_hdr *zh) |
245 |
{ |
246 |
unsigned size; |
247 |
|
248 |
ASSERT_SENTINEL(zh, ZBH); |
249 |
BUG_ON(!tmem_oid_valid(&zh->oid)); |
250 |
size = zh->size; |
251 |
BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); |
252 |
zh->size = 0; |
253 |
tmem_oid_set_invalid(&zh->oid); |
254 |
INVERT_SENTINEL(zh, ZBH); |
255 |
zcache_zbud_curr_zbytes -= size; |
256 |
atomic_dec(&zcache_zbud_curr_zpages); |
257 |
return size; |
258 |
} |
259 |
|
260 |
static void zbud_free_and_delist(struct zbud_hdr *zh) |
261 |
{ |
262 |
unsigned chunks; |
263 |
struct zbud_hdr *zh_other; |
264 |
unsigned budnum = zbud_budnum(zh), size; |
265 |
struct zbud_page *zbpg = |
266 |
container_of(zh, struct zbud_page, buddy[budnum]); |
267 |
|
268 |
spin_lock(&zbpg->lock); |
269 |
if (list_empty(&zbpg->bud_list)) { |
270 |
/* ignore zombie page... see zbud_evict_pages() */ |
271 |
spin_unlock(&zbpg->lock); |
272 |
return; |
273 |
} |
274 |
size = zbud_free(zh); |
275 |
ASSERT_SPINLOCK(&zbpg->lock); |
276 |
zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; |
277 |
if (zh_other->size == 0) { /* was unbuddied: unlist and free */ |
278 |
chunks = zbud_size_to_chunks(size) ; |
279 |
spin_lock(&zbud_budlists_spinlock); |
280 |
BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); |
281 |
list_del_init(&zbpg->bud_list); |
282 |
zbud_unbuddied[chunks].count--; |
283 |
spin_unlock(&zbud_budlists_spinlock); |
284 |
zbud_free_raw_page(zbpg); |
285 |
} else { /* was buddied: move remaining buddy to unbuddied list */ |
286 |
chunks = zbud_size_to_chunks(zh_other->size) ; |
287 |
spin_lock(&zbud_budlists_spinlock); |
288 |
list_del_init(&zbpg->bud_list); |
289 |
zcache_zbud_buddied_count--; |
290 |
list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); |
291 |
zbud_unbuddied[chunks].count++; |
292 |
spin_unlock(&zbud_budlists_spinlock); |
293 |
spin_unlock(&zbpg->lock); |
294 |
} |
295 |
} |
296 |
|
297 |
static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, |
298 |
uint32_t index, struct page *page, |
299 |
void *cdata, unsigned size) |
300 |
{ |
301 |
struct zbud_hdr *zh0, *zh1, *zh = NULL; |
302 |
struct zbud_page *zbpg = NULL, *ztmp; |
303 |
unsigned nchunks; |
304 |
char *to; |
305 |
int i, found_good_buddy = 0; |
306 |
|
307 |
nchunks = zbud_size_to_chunks(size) ; |
308 |
for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { |
309 |
spin_lock(&zbud_budlists_spinlock); |
310 |
if (!list_empty(&zbud_unbuddied[i].list)) { |
311 |
list_for_each_entry_safe(zbpg, ztmp, |
312 |
&zbud_unbuddied[i].list, bud_list) { |
313 |
if (spin_trylock(&zbpg->lock)) { |
314 |
found_good_buddy = i; |
315 |
goto found_unbuddied; |
316 |
} |
317 |
} |
318 |
} |
319 |
spin_unlock(&zbud_budlists_spinlock); |
320 |
} |
321 |
/* didn't find a good buddy, try allocating a new page */ |
322 |
zbpg = zbud_alloc_raw_page(); |
323 |
if (unlikely(zbpg == NULL)) |
324 |
goto out; |
325 |
/* ok, have a page, now compress the data before taking locks */ |
326 |
spin_lock(&zbpg->lock); |
327 |
spin_lock(&zbud_budlists_spinlock); |
328 |
list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list); |
329 |
zbud_unbuddied[nchunks].count++; |
330 |
zh = &zbpg->buddy[0]; |
331 |
goto init_zh; |
332 |
|
333 |
found_unbuddied: |
334 |
ASSERT_SPINLOCK(&zbpg->lock); |
335 |
zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; |
336 |
BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0))); |
337 |
if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */ |
338 |
ASSERT_SENTINEL(zh0, ZBH); |
339 |
zh = zh1; |
340 |
} else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */ |
341 |
ASSERT_SENTINEL(zh1, ZBH); |
342 |
zh = zh0; |
343 |
} else |
344 |
BUG(); |
345 |
list_del_init(&zbpg->bud_list); |
346 |
zbud_unbuddied[found_good_buddy].count--; |
347 |
list_add_tail(&zbpg->bud_list, &zbud_buddied_list); |
348 |
zcache_zbud_buddied_count++; |
349 |
|
350 |
init_zh: |
351 |
SET_SENTINEL(zh, ZBH); |
352 |
zh->size = size; |
353 |
zh->index = index; |
354 |
zh->oid = *oid; |
355 |
zh->pool_id = pool_id; |
356 |
/* can wait to copy the data until the list locks are dropped */ |
357 |
spin_unlock(&zbud_budlists_spinlock); |
358 |
|
359 |
to = zbud_data(zh, size); |
360 |
memcpy(to, cdata, size); |
361 |
spin_unlock(&zbpg->lock); |
362 |
zbud_cumul_chunk_counts[nchunks]++; |
363 |
atomic_inc(&zcache_zbud_curr_zpages); |
364 |
zcache_zbud_cumul_zpages++; |
365 |
zcache_zbud_curr_zbytes += size; |
366 |
zcache_zbud_cumul_zbytes += size; |
367 |
out: |
368 |
return zh; |
369 |
} |
370 |
|
371 |
static int zbud_decompress(struct page *page, struct zbud_hdr *zh) |
372 |
{ |
373 |
struct zbud_page *zbpg; |
374 |
unsigned budnum = zbud_budnum(zh); |
375 |
size_t out_len = PAGE_SIZE; |
376 |
char *to_va, *from_va; |
377 |
unsigned size; |
378 |
int ret = 0; |
379 |
|
380 |
zbpg = container_of(zh, struct zbud_page, buddy[budnum]); |
381 |
spin_lock(&zbpg->lock); |
382 |
if (list_empty(&zbpg->bud_list)) { |
383 |
/* ignore zombie page... see zbud_evict_pages() */ |
384 |
ret = -EINVAL; |
385 |
goto out; |
386 |
} |
387 |
ASSERT_SENTINEL(zh, ZBH); |
388 |
BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); |
389 |
to_va = kmap_atomic(page, KM_USER0); |
390 |
size = zh->size; |
391 |
from_va = zbud_data(zh, size); |
392 |
ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len); |
393 |
BUG_ON(ret != LZO_E_OK); |
394 |
BUG_ON(out_len != PAGE_SIZE); |
395 |
kunmap_atomic(to_va, KM_USER0); |
396 |
out: |
397 |
spin_unlock(&zbpg->lock); |
398 |
return ret; |
399 |
} |
400 |
|
401 |
/* |
402 |
* The following routines handle shrinking of ephemeral pages by evicting |
403 |
* pages "least valuable" first. |
404 |
*/ |
405 |
|
406 |
static unsigned long zcache_evicted_raw_pages; |
407 |
static unsigned long zcache_evicted_buddied_pages; |
408 |
static unsigned long zcache_evicted_unbuddied_pages; |
409 |
|
410 |
static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid); |
411 |
static void zcache_put_pool(struct tmem_pool *pool); |
412 |
|
413 |
/* |
414 |
* Flush and free all zbuds in a zbpg, then free the pageframe |
415 |
*/ |
416 |
static void zbud_evict_zbpg(struct zbud_page *zbpg) |
417 |
{ |
418 |
struct zbud_hdr *zh; |
419 |
int i, j; |
420 |
uint32_t pool_id[ZBUD_MAX_BUDS], index[ZBUD_MAX_BUDS]; |
421 |
struct tmem_oid oid[ZBUD_MAX_BUDS]; |
422 |
struct tmem_pool *pool; |
423 |
|
424 |
ASSERT_SPINLOCK(&zbpg->lock); |
425 |
BUG_ON(!list_empty(&zbpg->bud_list)); |
426 |
for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { |
427 |
zh = &zbpg->buddy[i]; |
428 |
if (zh->size) { |
429 |
pool_id[j] = zh->pool_id; |
430 |
oid[j] = zh->oid; |
431 |
index[j] = zh->index; |
432 |
j++; |
433 |
zbud_free(zh); |
434 |
} |
435 |
} |
436 |
spin_unlock(&zbpg->lock); |
437 |
for (i = 0; i < j; i++) { |
438 |
pool = zcache_get_pool_by_id(pool_id[i]); |
439 |
if (pool != NULL) { |
440 |
tmem_flush_page(pool, &oid[i], index[i]); |
441 |
zcache_put_pool(pool); |
442 |
} |
443 |
} |
444 |
ASSERT_SENTINEL(zbpg, ZBPG); |
445 |
spin_lock(&zbpg->lock); |
446 |
zbud_free_raw_page(zbpg); |
447 |
} |
448 |
|
449 |
/* |
450 |
* Free nr pages. This code is funky because we want to hold the locks |
451 |
* protecting various lists for as short a time as possible, and in some |
452 |
* circumstances the list may change asynchronously when the list lock is |
453 |
* not held. In some cases we also trylock not only to avoid waiting on a |
454 |
* page in use by another cpu, but also to avoid potential deadlock due to |
455 |
* lock inversion. |
456 |
*/ |
457 |
static void zbud_evict_pages(int nr) |
458 |
{ |
459 |
struct zbud_page *zbpg; |
460 |
int i; |
461 |
|
462 |
/* first try freeing any pages on unused list */ |
463 |
retry_unused_list: |
464 |
spin_lock_bh(&zbpg_unused_list_spinlock); |
465 |
if (!list_empty(&zbpg_unused_list)) { |
466 |
/* can't walk list here, since it may change when unlocked */ |
467 |
zbpg = list_first_entry(&zbpg_unused_list, |
468 |
struct zbud_page, bud_list); |
469 |
list_del_init(&zbpg->bud_list); |
470 |
zcache_zbpg_unused_list_count--; |
471 |
atomic_dec(&zcache_zbud_curr_raw_pages); |
472 |
spin_unlock_bh(&zbpg_unused_list_spinlock); |
473 |
zcache_free_page(zbpg); |
474 |
zcache_evicted_raw_pages++; |
475 |
if (--nr <= 0) |
476 |
goto out; |
477 |
goto retry_unused_list; |
478 |
} |
479 |
spin_unlock_bh(&zbpg_unused_list_spinlock); |
480 |
|
481 |
/* now try freeing unbuddied pages, starting with least space avail */ |
482 |
for (i = 0; i < MAX_CHUNK; i++) { |
483 |
retry_unbud_list_i: |
484 |
spin_lock_bh(&zbud_budlists_spinlock); |
485 |
if (list_empty(&zbud_unbuddied[i].list)) { |
486 |
spin_unlock_bh(&zbud_budlists_spinlock); |
487 |
continue; |
488 |
} |
489 |
list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) { |
490 |
if (unlikely(!spin_trylock(&zbpg->lock))) |
491 |
continue; |
492 |
list_del_init(&zbpg->bud_list); |
493 |
zbud_unbuddied[i].count--; |
494 |
spin_unlock(&zbud_budlists_spinlock); |
495 |
zcache_evicted_unbuddied_pages++; |
496 |
/* want budlists unlocked when doing zbpg eviction */ |
497 |
zbud_evict_zbpg(zbpg); |
498 |
local_bh_enable(); |
499 |
if (--nr <= 0) |
500 |
goto out; |
501 |
goto retry_unbud_list_i; |
502 |
} |
503 |
spin_unlock_bh(&zbud_budlists_spinlock); |
504 |
} |
505 |
|
506 |
/* as a last resort, free buddied pages */ |
507 |
retry_bud_list: |
508 |
spin_lock_bh(&zbud_budlists_spinlock); |
509 |
if (list_empty(&zbud_buddied_list)) { |
510 |
spin_unlock_bh(&zbud_budlists_spinlock); |
511 |
goto out; |
512 |
} |
513 |
list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) { |
514 |
if (unlikely(!spin_trylock(&zbpg->lock))) |
515 |
continue; |
516 |
list_del_init(&zbpg->bud_list); |
517 |
zcache_zbud_buddied_count--; |
518 |
spin_unlock(&zbud_budlists_spinlock); |
519 |
zcache_evicted_buddied_pages++; |
520 |
/* want budlists unlocked when doing zbpg eviction */ |
521 |
zbud_evict_zbpg(zbpg); |
522 |
local_bh_enable(); |
523 |
if (--nr <= 0) |
524 |
goto out; |
525 |
goto retry_bud_list; |
526 |
} |
527 |
spin_unlock_bh(&zbud_budlists_spinlock); |
528 |
out: |
529 |
return; |
530 |
} |
531 |
|
532 |
static void zbud_init(void) |
533 |
{ |
534 |
int i; |
535 |
|
536 |
INIT_LIST_HEAD(&zbud_buddied_list); |
537 |
zcache_zbud_buddied_count = 0; |
538 |
for (i = 0; i < NCHUNKS; i++) { |
539 |
INIT_LIST_HEAD(&zbud_unbuddied[i].list); |
540 |
zbud_unbuddied[i].count = 0; |
541 |
} |
542 |
} |
543 |
|
544 |
#ifdef CONFIG_SYSFS |
545 |
/* |
546 |
* These sysfs routines show a nice distribution of how many zbpg's are |
547 |
* currently (and have ever been placed) in each unbuddied list. It's fun |
548 |
* to watch but can probably go away before final merge. |
549 |
*/ |
550 |
static int zbud_show_unbuddied_list_counts(char *buf) |
551 |
{ |
552 |
int i; |
553 |
char *p = buf; |
554 |
|
555 |
for (i = 0; i < NCHUNKS - 1; i++) |
556 |
p += sprintf(p, "%u ", zbud_unbuddied[i].count); |
557 |
p += sprintf(p, "%d\n", zbud_unbuddied[i].count); |
558 |
return p - buf; |
559 |
} |
560 |
|
561 |
static int zbud_show_cumul_chunk_counts(char *buf) |
562 |
{ |
563 |
unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0; |
564 |
unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0; |
565 |
unsigned long total_chunks_lte_42 = 0; |
566 |
char *p = buf; |
567 |
|
568 |
for (i = 0; i < NCHUNKS; i++) { |
569 |
p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]); |
570 |
chunks += zbud_cumul_chunk_counts[i]; |
571 |
total_chunks += zbud_cumul_chunk_counts[i]; |
572 |
sum_total_chunks += i * zbud_cumul_chunk_counts[i]; |
573 |
if (i == 21) |
574 |
total_chunks_lte_21 = total_chunks; |
575 |
if (i == 32) |
576 |
total_chunks_lte_32 = total_chunks; |
577 |
if (i == 42) |
578 |
total_chunks_lte_42 = total_chunks; |
579 |
} |
580 |
p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n", |
581 |
total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42, |
582 |
chunks == 0 ? 0 : sum_total_chunks / chunks); |
583 |
return p - buf; |
584 |
} |
585 |
#endif |
586 |
|
587 |
/********** |
588 |
* This "zv" PAM implementation combines the TLSF-based xvMalloc |
589 |
* with lzo1x compression to maximize the amount of data that can |
590 |
* be packed into a physical page. |
591 |
* |
592 |
* Zv represents a PAM page with the index and object (plus a "size" value |
593 |
* necessary for decompression) immediately preceding the compressed data. |
594 |
*/ |
595 |
|
596 |
#define ZVH_SENTINEL 0x43214321 |
597 |
|
598 |
struct zv_hdr { |
599 |
uint32_t pool_id; |
600 |
struct tmem_oid oid; |
601 |
uint32_t index; |
602 |
DECL_SENTINEL |
603 |
}; |
604 |
|
605 |
static const int zv_max_page_size = (PAGE_SIZE / 8) * 7; |
606 |
|
607 |
static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id, |
608 |
struct tmem_oid *oid, uint32_t index, |
609 |
void *cdata, unsigned clen) |
610 |
{ |
611 |
struct page *page; |
612 |
struct zv_hdr *zv = NULL; |
613 |
uint32_t offset; |
614 |
int ret; |
615 |
|
616 |
BUG_ON(!irqs_disabled()); |
617 |
ret = xv_malloc(xvpool, clen + sizeof(struct zv_hdr), |
618 |
&page, &offset, ZCACHE_GFP_MASK); |
619 |
if (unlikely(ret)) |
620 |
goto out; |
621 |
zv = kmap_atomic(page, KM_USER0) + offset; |
622 |
zv->index = index; |
623 |
zv->oid = *oid; |
624 |
zv->pool_id = pool_id; |
625 |
SET_SENTINEL(zv, ZVH); |
626 |
memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); |
627 |
kunmap_atomic(zv, KM_USER0); |
628 |
out: |
629 |
return zv; |
630 |
} |
631 |
|
632 |
static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv) |
633 |
{ |
634 |
unsigned long flags; |
635 |
struct page *page; |
636 |
uint32_t offset; |
637 |
uint16_t size; |
638 |
|
639 |
ASSERT_SENTINEL(zv, ZVH); |
640 |
size = xv_get_object_size(zv) - sizeof(*zv); |
641 |
BUG_ON(size == 0 || size > zv_max_page_size); |
642 |
INVERT_SENTINEL(zv, ZVH); |
643 |
page = virt_to_page(zv); |
644 |
offset = (unsigned long)zv & ~PAGE_MASK; |
645 |
local_irq_save(flags); |
646 |
xv_free(xvpool, page, offset); |
647 |
local_irq_restore(flags); |
648 |
} |
649 |
|
650 |
static void zv_decompress(struct page *page, struct zv_hdr *zv) |
651 |
{ |
652 |
size_t clen = PAGE_SIZE; |
653 |
char *to_va; |
654 |
unsigned size; |
655 |
int ret; |
656 |
|
657 |
ASSERT_SENTINEL(zv, ZVH); |
658 |
size = xv_get_object_size(zv) - sizeof(*zv); |
659 |
BUG_ON(size == 0 || size > zv_max_page_size); |
660 |
to_va = kmap_atomic(page, KM_USER0); |
661 |
ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), |
662 |
size, to_va, &clen); |
663 |
kunmap_atomic(to_va, KM_USER0); |
664 |
BUG_ON(ret != LZO_E_OK); |
665 |
BUG_ON(clen != PAGE_SIZE); |
666 |
} |
667 |
|
668 |
/* |
669 |
* zcache core code starts here |
670 |
*/ |
671 |
|
672 |
/* useful stats not collected by cleancache or frontswap */ |
673 |
static unsigned long zcache_flush_total; |
674 |
static unsigned long zcache_flush_found; |
675 |
static unsigned long zcache_flobj_total; |
676 |
static unsigned long zcache_flobj_found; |
677 |
static unsigned long zcache_failed_eph_puts; |
678 |
static unsigned long zcache_failed_pers_puts; |
679 |
|
680 |
#define MAX_POOLS_PER_CLIENT 16 |
681 |
|
682 |
static struct { |
683 |
struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; |
684 |
struct xv_pool *xvpool; |
685 |
} zcache_client; |
686 |
|
687 |
/* |
688 |
* Tmem operations assume the poolid implies the invoking client. |
689 |
* Zcache only has one client (the kernel itself), so translate |
690 |
* the poolid into the tmem_pool allocated for it. A KVM version |
691 |
* of zcache would have one client per guest and each client might |
692 |
* have a poolid==N. |
693 |
*/ |
694 |
static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid) |
695 |
{ |
696 |
struct tmem_pool *pool = NULL; |
697 |
|
698 |
if (poolid >= 0) { |
699 |
pool = zcache_client.tmem_pools[poolid]; |
700 |
if (pool != NULL) |
701 |
atomic_inc(&pool->refcount); |
702 |
} |
703 |
return pool; |
704 |
} |
705 |
|
706 |
static void zcache_put_pool(struct tmem_pool *pool) |
707 |
{ |
708 |
if (pool != NULL) |
709 |
atomic_dec(&pool->refcount); |
710 |
} |
711 |
|
712 |
/* counters for debugging */ |
713 |
static unsigned long zcache_failed_get_free_pages; |
714 |
static unsigned long zcache_failed_alloc; |
715 |
static unsigned long zcache_put_to_flush; |
716 |
static unsigned long zcache_aborted_preload; |
717 |
static unsigned long zcache_aborted_shrink; |
718 |
|
719 |
/* |
720 |
* Ensure that memory allocation requests in zcache don't result |
721 |
* in direct reclaim requests via the shrinker, which would cause |
722 |
* an infinite loop. Maybe a GFP flag would be better? |
723 |
*/ |
724 |
static DEFINE_SPINLOCK(zcache_direct_reclaim_lock); |
725 |
|
726 |
/* |
727 |
* for now, used named slabs so can easily track usage; later can |
728 |
* either just use kmalloc, or perhaps add a slab-like allocator |
729 |
* to more carefully manage total memory utilization |
730 |
*/ |
731 |
static struct kmem_cache *zcache_objnode_cache; |
732 |
static struct kmem_cache *zcache_obj_cache; |
733 |
static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0); |
734 |
static unsigned long zcache_curr_obj_count_max; |
735 |
static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0); |
736 |
static unsigned long zcache_curr_objnode_count_max; |
737 |
|
738 |
/* |
739 |
* to avoid memory allocation recursion (e.g. due to direct reclaim), we |
740 |
* preload all necessary data structures so the hostops callbacks never |
741 |
* actually do a malloc |
742 |
*/ |
743 |
struct zcache_preload { |
744 |
void *page; |
745 |
struct tmem_obj *obj; |
746 |
int nr; |
747 |
struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH]; |
748 |
}; |
749 |
static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; |
750 |
|
751 |
static int zcache_do_preload(struct tmem_pool *pool) |
752 |
{ |
753 |
struct zcache_preload *kp; |
754 |
struct tmem_objnode *objnode; |
755 |
struct tmem_obj *obj; |
756 |
void *page; |
757 |
int ret = -ENOMEM; |
758 |
|
759 |
if (unlikely(zcache_objnode_cache == NULL)) |
760 |
goto out; |
761 |
if (unlikely(zcache_obj_cache == NULL)) |
762 |
goto out; |
763 |
if (!spin_trylock(&zcache_direct_reclaim_lock)) { |
764 |
zcache_aborted_preload++; |
765 |
goto out; |
766 |
} |
767 |
preempt_disable(); |
768 |
kp = &__get_cpu_var(zcache_preloads); |
769 |
while (kp->nr < ARRAY_SIZE(kp->objnodes)) { |
770 |
preempt_enable_no_resched(); |
771 |
objnode = kmem_cache_alloc(zcache_objnode_cache, |
772 |
ZCACHE_GFP_MASK); |
773 |
if (unlikely(objnode == NULL)) { |
774 |
zcache_failed_alloc++; |
775 |
goto unlock_out; |
776 |
} |
777 |
preempt_disable(); |
778 |
kp = &__get_cpu_var(zcache_preloads); |
779 |
if (kp->nr < ARRAY_SIZE(kp->objnodes)) |
780 |
kp->objnodes[kp->nr++] = objnode; |
781 |
else |
782 |
kmem_cache_free(zcache_objnode_cache, objnode); |
783 |
} |
784 |
preempt_enable_no_resched(); |
785 |
obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); |
786 |
if (unlikely(obj == NULL)) { |
787 |
zcache_failed_alloc++; |
788 |
goto unlock_out; |
789 |
} |
790 |
page = (void *)__get_free_page(ZCACHE_GFP_MASK); |
791 |
if (unlikely(page == NULL)) { |
792 |
zcache_failed_get_free_pages++; |
793 |
kmem_cache_free(zcache_obj_cache, obj); |
794 |
goto unlock_out; |
795 |
} |
796 |
preempt_disable(); |
797 |
kp = &__get_cpu_var(zcache_preloads); |
798 |
if (kp->obj == NULL) |
799 |
kp->obj = obj; |
800 |
else |
801 |
kmem_cache_free(zcache_obj_cache, obj); |
802 |
if (kp->page == NULL) |
803 |
kp->page = page; |
804 |
else |
805 |
free_page((unsigned long)page); |
806 |
ret = 0; |
807 |
unlock_out: |
808 |
spin_unlock(&zcache_direct_reclaim_lock); |
809 |
out: |
810 |
return ret; |
811 |
} |
812 |
|
813 |
static void *zcache_get_free_page(void) |
814 |
{ |
815 |
struct zcache_preload *kp; |
816 |
void *page; |
817 |
|
818 |
kp = &__get_cpu_var(zcache_preloads); |
819 |
page = kp->page; |
820 |
BUG_ON(page == NULL); |
821 |
kp->page = NULL; |
822 |
return page; |
823 |
} |
824 |
|
825 |
static void zcache_free_page(void *p) |
826 |
{ |
827 |
free_page((unsigned long)p); |
828 |
} |
829 |
|
830 |
/* |
831 |
* zcache implementation for tmem host ops |
832 |
*/ |
833 |
|
834 |
static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) |
835 |
{ |
836 |
struct tmem_objnode *objnode = NULL; |
837 |
unsigned long count; |
838 |
struct zcache_preload *kp; |
839 |
|
840 |
kp = &__get_cpu_var(zcache_preloads); |
841 |
if (kp->nr <= 0) |
842 |
goto out; |
843 |
objnode = kp->objnodes[kp->nr - 1]; |
844 |
BUG_ON(objnode == NULL); |
845 |
kp->objnodes[kp->nr - 1] = NULL; |
846 |
kp->nr--; |
847 |
count = atomic_inc_return(&zcache_curr_objnode_count); |
848 |
if (count > zcache_curr_objnode_count_max) |
849 |
zcache_curr_objnode_count_max = count; |
850 |
out: |
851 |
return objnode; |
852 |
} |
853 |
|
854 |
static void zcache_objnode_free(struct tmem_objnode *objnode, |
855 |
struct tmem_pool *pool) |
856 |
{ |
857 |
atomic_dec(&zcache_curr_objnode_count); |
858 |
BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0); |
859 |
kmem_cache_free(zcache_objnode_cache, objnode); |
860 |
} |
861 |
|
862 |
static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) |
863 |
{ |
864 |
struct tmem_obj *obj = NULL; |
865 |
unsigned long count; |
866 |
struct zcache_preload *kp; |
867 |
|
868 |
kp = &__get_cpu_var(zcache_preloads); |
869 |
obj = kp->obj; |
870 |
BUG_ON(obj == NULL); |
871 |
kp->obj = NULL; |
872 |
count = atomic_inc_return(&zcache_curr_obj_count); |
873 |
if (count > zcache_curr_obj_count_max) |
874 |
zcache_curr_obj_count_max = count; |
875 |
return obj; |
876 |
} |
877 |
|
878 |
static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) |
879 |
{ |
880 |
atomic_dec(&zcache_curr_obj_count); |
881 |
BUG_ON(atomic_read(&zcache_curr_obj_count) < 0); |
882 |
kmem_cache_free(zcache_obj_cache, obj); |
883 |
} |
884 |
|
885 |
static struct tmem_hostops zcache_hostops = { |
886 |
.obj_alloc = zcache_obj_alloc, |
887 |
.obj_free = zcache_obj_free, |
888 |
.objnode_alloc = zcache_objnode_alloc, |
889 |
.objnode_free = zcache_objnode_free, |
890 |
}; |
891 |
|
892 |
/* |
893 |
* zcache implementations for PAM page descriptor ops |
894 |
*/ |
895 |
|
896 |
static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0); |
897 |
static unsigned long zcache_curr_eph_pampd_count_max; |
898 |
static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0); |
899 |
static unsigned long zcache_curr_pers_pampd_count_max; |
900 |
|
901 |
/* forward reference */ |
902 |
static int zcache_compress(struct page *from, void **out_va, size_t *out_len); |
903 |
|
904 |
static void *zcache_pampd_create(struct tmem_pool *pool, struct tmem_oid *oid, |
905 |
uint32_t index, struct page *page) |
906 |
{ |
907 |
void *pampd = NULL, *cdata; |
908 |
size_t clen; |
909 |
int ret; |
910 |
bool ephemeral = is_ephemeral(pool); |
911 |
unsigned long count; |
912 |
|
913 |
if (ephemeral) { |
914 |
ret = zcache_compress(page, &cdata, &clen); |
915 |
if (ret == 0) |
916 |
|
917 |
goto out; |
918 |
if (clen == 0 || clen > zbud_max_buddy_size()) { |
919 |
zcache_compress_poor++; |
920 |
goto out; |
921 |
} |
922 |
pampd = (void *)zbud_create(pool->pool_id, oid, index, |
923 |
page, cdata, clen); |
924 |
if (pampd != NULL) { |
925 |
count = atomic_inc_return(&zcache_curr_eph_pampd_count); |
926 |
if (count > zcache_curr_eph_pampd_count_max) |
927 |
zcache_curr_eph_pampd_count_max = count; |
928 |
} |
929 |
} else { |
930 |
/* |
931 |
* FIXME: This is all the "policy" there is for now. |
932 |
* 3/4 totpages should allow ~37% of RAM to be filled with |
933 |
* compressed frontswap pages |
934 |
*/ |
935 |
if (atomic_read(&zcache_curr_pers_pampd_count) > |
936 |
3 * totalram_pages / 4) |
937 |
goto out; |
938 |
ret = zcache_compress(page, &cdata, &clen); |
939 |
if (ret == 0) |
940 |
goto out; |
941 |
if (clen > zv_max_page_size) { |
942 |
zcache_compress_poor++; |
943 |
goto out; |
944 |
} |
945 |
pampd = (void *)zv_create(zcache_client.xvpool, pool->pool_id, |
946 |
oid, index, cdata, clen); |
947 |
if (pampd == NULL) |
948 |
goto out; |
949 |
count = atomic_inc_return(&zcache_curr_pers_pampd_count); |
950 |
if (count > zcache_curr_pers_pampd_count_max) |
951 |
zcache_curr_pers_pampd_count_max = count; |
952 |
} |
953 |
out: |
954 |
return pampd; |
955 |
} |
956 |
|
957 |
/* |
958 |
* fill the pageframe corresponding to the struct page with the data |
959 |
* from the passed pampd |
960 |
*/ |
961 |
static int zcache_pampd_get_data(struct page *page, void *pampd, |
962 |
struct tmem_pool *pool) |
963 |
{ |
964 |
int ret = 0; |
965 |
|
966 |
if (is_ephemeral(pool)) |
967 |
ret = zbud_decompress(page, pampd); |
968 |
else |
969 |
zv_decompress(page, pampd); |
970 |
return ret; |
971 |
} |
972 |
|
973 |
/* |
974 |
* free the pampd and remove it from any zcache lists |
975 |
* pampd must no longer be pointed to from any tmem data structures! |
976 |
*/ |
977 |
static void zcache_pampd_free(void *pampd, struct tmem_pool *pool) |
978 |
{ |
979 |
if (is_ephemeral(pool)) { |
980 |
zbud_free_and_delist((struct zbud_hdr *)pampd); |
981 |
atomic_dec(&zcache_curr_eph_pampd_count); |
982 |
BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); |
983 |
} else { |
984 |
zv_free(zcache_client.xvpool, (struct zv_hdr *)pampd); |
985 |
atomic_dec(&zcache_curr_pers_pampd_count); |
986 |
BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); |
987 |
} |
988 |
} |
989 |
|
990 |
static struct tmem_pamops zcache_pamops = { |
991 |
.create = zcache_pampd_create, |
992 |
.get_data = zcache_pampd_get_data, |
993 |
.free = zcache_pampd_free, |
994 |
}; |
995 |
|
996 |
/* |
997 |
* zcache compression/decompression and related per-cpu stuff |
998 |
*/ |
999 |
|
1000 |
#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS |
1001 |
#define LZO_DSTMEM_PAGE_ORDER 1 |
1002 |
static DEFINE_PER_CPU(unsigned char *, zcache_workmem); |
1003 |
static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); |
1004 |
|
1005 |
static int zcache_compress(struct page *from, void **out_va, size_t *out_len) |
1006 |
{ |
1007 |
int ret = 0; |
1008 |
unsigned char *dmem = __get_cpu_var(zcache_dstmem); |
1009 |
unsigned char *wmem = __get_cpu_var(zcache_workmem); |
1010 |
char *from_va; |
1011 |
|
1012 |
BUG_ON(!irqs_disabled()); |
1013 |
if (unlikely(dmem == NULL || wmem == NULL)) |
1014 |
goto out; /* no buffer, so can't compress */ |
1015 |
from_va = kmap_atomic(from, KM_USER0); |
1016 |
mb(); |
1017 |
ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem); |
1018 |
BUG_ON(ret != LZO_E_OK); |
1019 |
*out_va = dmem; |
1020 |
kunmap_atomic(from_va, KM_USER0); |
1021 |
ret = 1; |
1022 |
out: |
1023 |
return ret; |
1024 |
} |
1025 |
|
1026 |
|
1027 |
static int zcache_cpu_notifier(struct notifier_block *nb, |
1028 |
unsigned long action, void *pcpu) |
1029 |
{ |
1030 |
int cpu = (long)pcpu; |
1031 |
struct zcache_preload *kp; |
1032 |
|
1033 |
switch (action) { |
1034 |
case CPU_UP_PREPARE: |
1035 |
per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( |
1036 |
GFP_KERNEL | __GFP_REPEAT, |
1037 |
LZO_DSTMEM_PAGE_ORDER), |
1038 |
per_cpu(zcache_workmem, cpu) = |
1039 |
kzalloc(LZO1X_MEM_COMPRESS, |
1040 |
GFP_KERNEL | __GFP_REPEAT); |
1041 |
break; |
1042 |
case CPU_DEAD: |
1043 |
case CPU_UP_CANCELED: |
1044 |
free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), |
1045 |
LZO_DSTMEM_PAGE_ORDER); |
1046 |
per_cpu(zcache_dstmem, cpu) = NULL; |
1047 |
kfree(per_cpu(zcache_workmem, cpu)); |
1048 |
per_cpu(zcache_workmem, cpu) = NULL; |
1049 |
kp = &per_cpu(zcache_preloads, cpu); |
1050 |
while (kp->nr) { |
1051 |
kmem_cache_free(zcache_objnode_cache, |
1052 |
kp->objnodes[kp->nr - 1]); |
1053 |
kp->objnodes[kp->nr - 1] = NULL; |
1054 |
kp->nr--; |
1055 |
} |
1056 |
kmem_cache_free(zcache_obj_cache, kp->obj); |
1057 |
free_page((unsigned long)kp->page); |
1058 |
break; |
1059 |
default: |
1060 |
break; |
1061 |
} |
1062 |
return NOTIFY_OK; |
1063 |
} |
1064 |
|
1065 |
static struct notifier_block zcache_cpu_notifier_block = { |
1066 |
.notifier_call = zcache_cpu_notifier |
1067 |
}; |
1068 |
|
1069 |
#ifdef CONFIG_SYSFS |
1070 |
#define ZCACHE_SYSFS_RO(_name) \ |
1071 |
static ssize_t zcache_##_name##_show(struct kobject *kobj, \ |
1072 |
struct kobj_attribute *attr, char *buf) \ |
1073 |
{ \ |
1074 |
return sprintf(buf, "%lu\n", zcache_##_name); \ |
1075 |
} \ |
1076 |
static struct kobj_attribute zcache_##_name##_attr = { \ |
1077 |
.attr = { .name = __stringify(_name), .mode = 0444 }, \ |
1078 |
.show = zcache_##_name##_show, \ |
1079 |
} |
1080 |
|
1081 |
#define ZCACHE_SYSFS_RO_ATOMIC(_name) \ |
1082 |
static ssize_t zcache_##_name##_show(struct kobject *kobj, \ |
1083 |
struct kobj_attribute *attr, char *buf) \ |
1084 |
{ \ |
1085 |
return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \ |
1086 |
} \ |
1087 |
static struct kobj_attribute zcache_##_name##_attr = { \ |
1088 |
.attr = { .name = __stringify(_name), .mode = 0444 }, \ |
1089 |
.show = zcache_##_name##_show, \ |
1090 |
} |
1091 |
|
1092 |
#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \ |
1093 |
static ssize_t zcache_##_name##_show(struct kobject *kobj, \ |
1094 |
struct kobj_attribute *attr, char *buf) \ |
1095 |
{ \ |
1096 |
return _func(buf); \ |
1097 |
} \ |
1098 |
static struct kobj_attribute zcache_##_name##_attr = { \ |
1099 |
.attr = { .name = __stringify(_name), .mode = 0444 }, \ |
1100 |
.show = zcache_##_name##_show, \ |
1101 |
} |
1102 |
|
1103 |
ZCACHE_SYSFS_RO(curr_obj_count_max); |
1104 |
ZCACHE_SYSFS_RO(curr_objnode_count_max); |
1105 |
ZCACHE_SYSFS_RO(flush_total); |
1106 |
ZCACHE_SYSFS_RO(flush_found); |
1107 |
ZCACHE_SYSFS_RO(flobj_total); |
1108 |
ZCACHE_SYSFS_RO(flobj_found); |
1109 |
ZCACHE_SYSFS_RO(failed_eph_puts); |
1110 |
ZCACHE_SYSFS_RO(failed_pers_puts); |
1111 |
ZCACHE_SYSFS_RO(zbud_curr_zbytes); |
1112 |
ZCACHE_SYSFS_RO(zbud_cumul_zpages); |
1113 |
ZCACHE_SYSFS_RO(zbud_cumul_zbytes); |
1114 |
ZCACHE_SYSFS_RO(zbud_buddied_count); |
1115 |
ZCACHE_SYSFS_RO(zbpg_unused_list_count); |
1116 |
ZCACHE_SYSFS_RO(evicted_raw_pages); |
1117 |
ZCACHE_SYSFS_RO(evicted_unbuddied_pages); |
1118 |
ZCACHE_SYSFS_RO(evicted_buddied_pages); |
1119 |
ZCACHE_SYSFS_RO(failed_get_free_pages); |
1120 |
ZCACHE_SYSFS_RO(failed_alloc); |
1121 |
ZCACHE_SYSFS_RO(put_to_flush); |
1122 |
ZCACHE_SYSFS_RO(aborted_preload); |
1123 |
ZCACHE_SYSFS_RO(aborted_shrink); |
1124 |
ZCACHE_SYSFS_RO(compress_poor); |
1125 |
ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); |
1126 |
ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); |
1127 |
ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); |
1128 |
ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count); |
1129 |
ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts, |
1130 |
zbud_show_unbuddied_list_counts); |
1131 |
ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, |
1132 |
zbud_show_cumul_chunk_counts); |
1133 |
|
1134 |
static struct attribute *zcache_attrs[] = { |
1135 |
&zcache_curr_obj_count_attr.attr, |
1136 |
&zcache_curr_obj_count_max_attr.attr, |
1137 |
&zcache_curr_objnode_count_attr.attr, |
1138 |
&zcache_curr_objnode_count_max_attr.attr, |
1139 |
&zcache_flush_total_attr.attr, |
1140 |
&zcache_flobj_total_attr.attr, |
1141 |
&zcache_flush_found_attr.attr, |
1142 |
&zcache_flobj_found_attr.attr, |
1143 |
&zcache_failed_eph_puts_attr.attr, |
1144 |
&zcache_failed_pers_puts_attr.attr, |
1145 |
&zcache_compress_poor_attr.attr, |
1146 |
&zcache_zbud_curr_raw_pages_attr.attr, |
1147 |
&zcache_zbud_curr_zpages_attr.attr, |
1148 |
&zcache_zbud_curr_zbytes_attr.attr, |
1149 |
&zcache_zbud_cumul_zpages_attr.attr, |
1150 |
&zcache_zbud_cumul_zbytes_attr.attr, |
1151 |
&zcache_zbud_buddied_count_attr.attr, |
1152 |
&zcache_zbpg_unused_list_count_attr.attr, |
1153 |
&zcache_evicted_raw_pages_attr.attr, |
1154 |
&zcache_evicted_unbuddied_pages_attr.attr, |
1155 |
&zcache_evicted_buddied_pages_attr.attr, |
1156 |
&zcache_failed_get_free_pages_attr.attr, |
1157 |
&zcache_failed_alloc_attr.attr, |
1158 |
&zcache_put_to_flush_attr.attr, |
1159 |
&zcache_aborted_preload_attr.attr, |
1160 |
&zcache_aborted_shrink_attr.attr, |
1161 |
&zcache_zbud_unbuddied_list_counts_attr.attr, |
1162 |
&zcache_zbud_cumul_chunk_counts_attr.attr, |
1163 |
NULL, |
1164 |
}; |
1165 |
|
1166 |
static struct attribute_group zcache_attr_group = { |
1167 |
.attrs = zcache_attrs, |
1168 |
.name = "zcache", |
1169 |
}; |
1170 |
|
1171 |
#endif /* CONFIG_SYSFS */ |
1172 |
/* |
1173 |
* When zcache is disabled ("frozen"), pools can be created and destroyed, |
1174 |
* but all puts (and thus all other operations that require memory allocation) |
1175 |
* must fail. If zcache is unfrozen, accepts puts, then frozen again, |
1176 |
* data consistency requires all puts while frozen to be converted into |
1177 |
* flushes. |
1178 |
*/ |
1179 |
static bool zcache_freeze; |
1180 |
|
1181 |
/* |
1182 |
* zcache shrinker interface (only useful for ephemeral pages, so zbud only) |
1183 |
*/ |
1184 |
static int shrink_zcache_memory(struct shrinker *shrink, |
1185 |
struct shrink_control *sc) |
1186 |
{ |
1187 |
int ret = -1; |
1188 |
int nr = sc->nr_to_scan; |
1189 |
gfp_t gfp_mask = sc->gfp_mask; |
1190 |
|
1191 |
if (nr >= 0) { |
1192 |
if (!(gfp_mask & __GFP_FS)) |
1193 |
/* does this case really need to be skipped? */ |
1194 |
goto out; |
1195 |
if (spin_trylock(&zcache_direct_reclaim_lock)) { |
1196 |
zbud_evict_pages(nr); |
1197 |
spin_unlock(&zcache_direct_reclaim_lock); |
1198 |
} else |
1199 |
zcache_aborted_shrink++; |
1200 |
} |
1201 |
ret = (int)atomic_read(&zcache_zbud_curr_raw_pages); |
1202 |
out: |
1203 |
return ret; |
1204 |
} |
1205 |
|
1206 |
static struct shrinker zcache_shrinker = { |
1207 |
.shrink = shrink_zcache_memory, |
1208 |
.seeks = DEFAULT_SEEKS, |
1209 |
}; |
1210 |
|
1211 |
/* |
1212 |
* zcache shims between cleancache/frontswap ops and tmem |
1213 |
*/ |
1214 |
|
1215 |
static int zcache_put_page(int pool_id, struct tmem_oid *oidp, |
1216 |
uint32_t index, struct page *page) |
1217 |
{ |
1218 |
struct tmem_pool *pool; |
1219 |
int ret = -1; |
1220 |
|
1221 |
BUG_ON(!irqs_disabled()); |
1222 |
pool = zcache_get_pool_by_id(pool_id); |
1223 |
if (unlikely(pool == NULL)) |
1224 |
goto out; |
1225 |
if (!zcache_freeze && zcache_do_preload(pool) == 0) { |
1226 |
/* preload does preempt_disable on success */ |
1227 |
ret = tmem_put(pool, oidp, index, page); |
1228 |
if (ret < 0) { |
1229 |
if (is_ephemeral(pool)) |
1230 |
zcache_failed_eph_puts++; |
1231 |
else |
1232 |
zcache_failed_pers_puts++; |
1233 |
} |
1234 |
zcache_put_pool(pool); |
1235 |
preempt_enable_no_resched(); |
1236 |
} else { |
1237 |
zcache_put_to_flush++; |
1238 |
if (atomic_read(&pool->obj_count) > 0) |
1239 |
/* the put fails whether the flush succeeds or not */ |
1240 |
(void)tmem_flush_page(pool, oidp, index); |
1241 |
zcache_put_pool(pool); |
1242 |
} |
1243 |
out: |
1244 |
return ret; |
1245 |
} |
1246 |
|
1247 |
static int zcache_get_page(int pool_id, struct tmem_oid *oidp, |
1248 |
uint32_t index, struct page *page) |
1249 |
{ |
1250 |
struct tmem_pool *pool; |
1251 |
int ret = -1; |
1252 |
unsigned long flags; |
1253 |
|
1254 |
local_irq_save(flags); |
1255 |
pool = zcache_get_pool_by_id(pool_id); |
1256 |
if (likely(pool != NULL)) { |
1257 |
if (atomic_read(&pool->obj_count) > 0) |
1258 |
ret = tmem_get(pool, oidp, index, page); |
1259 |
zcache_put_pool(pool); |
1260 |
} |
1261 |
local_irq_restore(flags); |
1262 |
return ret; |
1263 |
} |
1264 |
|
1265 |
static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) |
1266 |
{ |
1267 |
struct tmem_pool *pool; |
1268 |
int ret = -1; |
1269 |
unsigned long flags; |
1270 |
|
1271 |
local_irq_save(flags); |
1272 |
zcache_flush_total++; |
1273 |
pool = zcache_get_pool_by_id(pool_id); |
1274 |
if (likely(pool != NULL)) { |
1275 |
if (atomic_read(&pool->obj_count) > 0) |
1276 |
ret = tmem_flush_page(pool, oidp, index); |
1277 |
zcache_put_pool(pool); |
1278 |
} |
1279 |
if (ret >= 0) |
1280 |
zcache_flush_found++; |
1281 |
local_irq_restore(flags); |
1282 |
return ret; |
1283 |
} |
1284 |
|
1285 |
static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) |
1286 |
{ |
1287 |
struct tmem_pool *pool; |
1288 |
int ret = -1; |
1289 |
unsigned long flags; |
1290 |
|
1291 |
local_irq_save(flags); |
1292 |
zcache_flobj_total++; |
1293 |
pool = zcache_get_pool_by_id(pool_id); |
1294 |
if (likely(pool != NULL)) { |
1295 |
if (atomic_read(&pool->obj_count) > 0) |
1296 |
ret = tmem_flush_object(pool, oidp); |
1297 |
zcache_put_pool(pool); |
1298 |
} |
1299 |
if (ret >= 0) |
1300 |
zcache_flobj_found++; |
1301 |
local_irq_restore(flags); |
1302 |
return ret; |
1303 |
} |
1304 |
|
1305 |
static int zcache_destroy_pool(int pool_id) |
1306 |
{ |
1307 |
struct tmem_pool *pool = NULL; |
1308 |
int ret = -1; |
1309 |
|
1310 |
if (pool_id < 0) |
1311 |
goto out; |
1312 |
pool = zcache_client.tmem_pools[pool_id]; |
1313 |
if (pool == NULL) |
1314 |
goto out; |
1315 |
zcache_client.tmem_pools[pool_id] = NULL; |
1316 |
/* wait for pool activity on other cpus to quiesce */ |
1317 |
while (atomic_read(&pool->refcount) != 0) |
1318 |
; |
1319 |
local_bh_disable(); |
1320 |
ret = tmem_destroy_pool(pool); |
1321 |
local_bh_enable(); |
1322 |
kfree(pool); |
1323 |
pr_info("zcache: destroyed pool id=%d\n", pool_id); |
1324 |
out: |
1325 |
return ret; |
1326 |
} |
1327 |
|
1328 |
static int zcache_new_pool(uint32_t flags) |
1329 |
{ |
1330 |
int poolid = -1; |
1331 |
struct tmem_pool *pool; |
1332 |
|
1333 |
pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); |
1334 |
if (pool == NULL) { |
1335 |
pr_info("zcache: pool creation failed: out of memory\n"); |
1336 |
goto out; |
1337 |
} |
1338 |
|
1339 |
for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) |
1340 |
if (zcache_client.tmem_pools[poolid] == NULL) |
1341 |
break; |
1342 |
if (poolid >= MAX_POOLS_PER_CLIENT) { |
1343 |
pr_info("zcache: pool creation failed: max exceeded\n"); |
1344 |
kfree(pool); |
1345 |
poolid = -1; |
1346 |
goto out; |
1347 |
} |
1348 |
atomic_set(&pool->refcount, 0); |
1349 |
pool->client = &zcache_client; |
1350 |
pool->pool_id = poolid; |
1351 |
tmem_new_pool(pool, flags); |
1352 |
zcache_client.tmem_pools[poolid] = pool; |
1353 |
pr_info("zcache: created %s tmem pool, id=%d\n", |
1354 |
flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", |
1355 |
poolid); |
1356 |
out: |
1357 |
return poolid; |
1358 |
} |
1359 |
|
1360 |
/********** |
1361 |
* Two kernel functionalities currently can be layered on top of tmem. |
1362 |
* These are "cleancache" which is used as a second-chance cache for clean |
1363 |
* page cache pages; and "frontswap" which is used for swap pages |
1364 |
* to avoid writes to disk. A generic "shim" is provided here for each |
1365 |
* to translate in-kernel semantics to zcache semantics. |
1366 |
*/ |
1367 |
|
1368 |
#ifdef CONFIG_CLEANCACHE |
1369 |
static void zcache_cleancache_put_page(int pool_id, |
1370 |
struct cleancache_filekey key, |
1371 |
pgoff_t index, struct page *page) |
1372 |
{ |
1373 |
u32 ind = (u32) index; |
1374 |
struct tmem_oid oid = *(struct tmem_oid *)&key; |
1375 |
|
1376 |
if (likely(ind == index)) |
1377 |
(void)zcache_put_page(pool_id, &oid, index, page); |
1378 |
} |
1379 |
|
1380 |
static int zcache_cleancache_get_page(int pool_id, |
1381 |
struct cleancache_filekey key, |
1382 |
pgoff_t index, struct page *page) |
1383 |
{ |
1384 |
u32 ind = (u32) index; |
1385 |
struct tmem_oid oid = *(struct tmem_oid *)&key; |
1386 |
int ret = -1; |
1387 |
|
1388 |
if (likely(ind == index)) |
1389 |
ret = zcache_get_page(pool_id, &oid, index, page); |
1390 |
return ret; |
1391 |
} |
1392 |
|
1393 |
static void zcache_cleancache_flush_page(int pool_id, |
1394 |
struct cleancache_filekey key, |
1395 |
pgoff_t index) |
1396 |
{ |
1397 |
u32 ind = (u32) index; |
1398 |
struct tmem_oid oid = *(struct tmem_oid *)&key; |
1399 |
|
1400 |
if (likely(ind == index)) |
1401 |
(void)zcache_flush_page(pool_id, &oid, ind); |
1402 |
} |
1403 |
|
1404 |
static void zcache_cleancache_flush_inode(int pool_id, |
1405 |
struct cleancache_filekey key) |
1406 |
{ |
1407 |
struct tmem_oid oid = *(struct tmem_oid *)&key; |
1408 |
|
1409 |
(void)zcache_flush_object(pool_id, &oid); |
1410 |
} |
1411 |
|
1412 |
static void zcache_cleancache_flush_fs(int pool_id) |
1413 |
{ |
1414 |
if (pool_id >= 0) |
1415 |
(void)zcache_destroy_pool(pool_id); |
1416 |
} |
1417 |
|
1418 |
static int zcache_cleancache_init_fs(size_t pagesize) |
1419 |
{ |
1420 |
BUG_ON(sizeof(struct cleancache_filekey) != |
1421 |
sizeof(struct tmem_oid)); |
1422 |
BUG_ON(pagesize != PAGE_SIZE); |
1423 |
return zcache_new_pool(0); |
1424 |
} |
1425 |
|
1426 |
static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) |
1427 |
{ |
1428 |
/* shared pools are unsupported and map to private */ |
1429 |
BUG_ON(sizeof(struct cleancache_filekey) != |
1430 |
sizeof(struct tmem_oid)); |
1431 |
BUG_ON(pagesize != PAGE_SIZE); |
1432 |
return zcache_new_pool(0); |
1433 |
} |
1434 |
|
1435 |
static struct cleancache_ops zcache_cleancache_ops = { |
1436 |
.put_page = zcache_cleancache_put_page, |
1437 |
.get_page = zcache_cleancache_get_page, |
1438 |
.flush_page = zcache_cleancache_flush_page, |
1439 |
.flush_inode = zcache_cleancache_flush_inode, |
1440 |
.flush_fs = zcache_cleancache_flush_fs, |
1441 |
.init_shared_fs = zcache_cleancache_init_shared_fs, |
1442 |
.init_fs = zcache_cleancache_init_fs |
1443 |
}; |
1444 |
|
1445 |
struct cleancache_ops zcache_cleancache_register_ops(void) |
1446 |
{ |
1447 |
struct cleancache_ops old_ops = |
1448 |
cleancache_register_ops(&zcache_cleancache_ops); |
1449 |
|
1450 |
return old_ops; |
1451 |
} |
1452 |
#endif |
1453 |
|
1454 |
#ifdef CONFIG_FRONTSWAP |
1455 |
/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ |
1456 |
static int zcache_frontswap_poolid = -1; |
1457 |
|
1458 |
/* |
1459 |
* Swizzling increases objects per swaptype, increasing tmem concurrency |
1460 |
* for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS |
1461 |
*/ |
1462 |
#define SWIZ_BITS 4 |
1463 |
#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) |
1464 |
#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) |
1465 |
#define iswiz(_ind) (_ind >> SWIZ_BITS) |
1466 |
|
1467 |
static inline struct tmem_oid oswiz(unsigned type, u32 ind) |
1468 |
{ |
1469 |
struct tmem_oid oid = { .oid = { 0 } }; |
1470 |
oid.oid[0] = _oswiz(type, ind); |
1471 |
return oid; |
1472 |
} |
1473 |
|
1474 |
static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, |
1475 |
struct page *page) |
1476 |
{ |
1477 |
u64 ind64 = (u64)offset; |
1478 |
u32 ind = (u32)offset; |
1479 |
struct tmem_oid oid = oswiz(type, ind); |
1480 |
int ret = -1; |
1481 |
unsigned long flags; |
1482 |
|
1483 |
BUG_ON(!PageLocked(page)); |
1484 |
if (likely(ind64 == ind)) { |
1485 |
local_irq_save(flags); |
1486 |
ret = zcache_put_page(zcache_frontswap_poolid, &oid, |
1487 |
iswiz(ind), page); |
1488 |
local_irq_restore(flags); |
1489 |
} |
1490 |
return ret; |
1491 |
} |
1492 |
|
1493 |
/* returns 0 if the page was successfully gotten from frontswap, -1 if |
1494 |
* was not present (should never happen!) */ |
1495 |
static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, |
1496 |
struct page *page) |
1497 |
{ |
1498 |
u64 ind64 = (u64)offset; |
1499 |
u32 ind = (u32)offset; |
1500 |
struct tmem_oid oid = oswiz(type, ind); |
1501 |
int ret = -1; |
1502 |
|
1503 |
BUG_ON(!PageLocked(page)); |
1504 |
if (likely(ind64 == ind)) |
1505 |
ret = zcache_get_page(zcache_frontswap_poolid, &oid, |
1506 |
iswiz(ind), page); |
1507 |
return ret; |
1508 |
} |
1509 |
|
1510 |
/* flush a single page from frontswap */ |
1511 |
static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) |
1512 |
{ |
1513 |
u64 ind64 = (u64)offset; |
1514 |
u32 ind = (u32)offset; |
1515 |
struct tmem_oid oid = oswiz(type, ind); |
1516 |
|
1517 |
if (likely(ind64 == ind)) |
1518 |
(void)zcache_flush_page(zcache_frontswap_poolid, &oid, |
1519 |
iswiz(ind)); |
1520 |
} |
1521 |
|
1522 |
/* flush all pages from the passed swaptype */ |
1523 |
static void zcache_frontswap_flush_area(unsigned type) |
1524 |
{ |
1525 |
struct tmem_oid oid; |
1526 |
int ind; |
1527 |
|
1528 |
for (ind = SWIZ_MASK; ind >= 0; ind--) { |
1529 |
oid = oswiz(type, ind); |
1530 |
(void)zcache_flush_object(zcache_frontswap_poolid, &oid); |
1531 |
} |
1532 |
} |
1533 |
|
1534 |
static void zcache_frontswap_init(unsigned ignored) |
1535 |
{ |
1536 |
/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ |
1537 |
if (zcache_frontswap_poolid < 0) |
1538 |
zcache_frontswap_poolid = zcache_new_pool(TMEM_POOL_PERSIST); |
1539 |
} |
1540 |
|
1541 |
static struct frontswap_ops zcache_frontswap_ops = { |
1542 |
.put_page = zcache_frontswap_put_page, |
1543 |
.get_page = zcache_frontswap_get_page, |
1544 |
.flush_page = zcache_frontswap_flush_page, |
1545 |
.flush_area = zcache_frontswap_flush_area, |
1546 |
.init = zcache_frontswap_init |
1547 |
}; |
1548 |
|
1549 |
struct frontswap_ops zcache_frontswap_register_ops(void) |
1550 |
{ |
1551 |
struct frontswap_ops old_ops = |
1552 |
frontswap_register_ops(&zcache_frontswap_ops); |
1553 |
|
1554 |
return old_ops; |
1555 |
} |
1556 |
#endif |
1557 |
|
1558 |
/* |
1559 |
* zcache initialization |
1560 |
* NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR |
1561 |
* NOTHING HAPPENS! |
1562 |
*/ |
1563 |
|
1564 |
static int zcache_enabled; |
1565 |
|
1566 |
static int __init enable_zcache(char *s) |
1567 |
{ |
1568 |
zcache_enabled = 1; |
1569 |
return 1; |
1570 |
} |
1571 |
__setup("zcache", enable_zcache); |
1572 |
|
1573 |
/* allow independent dynamic disabling of cleancache and frontswap */ |
1574 |
|
1575 |
static int use_cleancache = 1; |
1576 |
|
1577 |
static int __init no_cleancache(char *s) |
1578 |
{ |
1579 |
use_cleancache = 0; |
1580 |
return 1; |
1581 |
} |
1582 |
|
1583 |
__setup("nocleancache", no_cleancache); |
1584 |
|
1585 |
static int use_frontswap = 1; |
1586 |
|
1587 |
static int __init no_frontswap(char *s) |
1588 |
{ |
1589 |
use_frontswap = 0; |
1590 |
return 1; |
1591 |
} |
1592 |
|
1593 |
__setup("nofrontswap", no_frontswap); |
1594 |
|
1595 |
static int __init zcache_init(void) |
1596 |
{ |
1597 |
#ifdef CONFIG_SYSFS |
1598 |
int ret = 0; |
1599 |
|
1600 |
ret = sysfs_create_group(mm_kobj, &zcache_attr_group); |
1601 |
if (ret) { |
1602 |
pr_err("zcache: can't create sysfs\n"); |
1603 |
goto out; |
1604 |
} |
1605 |
#endif /* CONFIG_SYSFS */ |
1606 |
#if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP) |
1607 |
if (zcache_enabled) { |
1608 |
unsigned int cpu; |
1609 |
|
1610 |
tmem_register_hostops(&zcache_hostops); |
1611 |
tmem_register_pamops(&zcache_pamops); |
1612 |
ret = register_cpu_notifier(&zcache_cpu_notifier_block); |
1613 |
if (ret) { |
1614 |
pr_err("zcache: can't register cpu notifier\n"); |
1615 |
goto out; |
1616 |
} |
1617 |
for_each_online_cpu(cpu) { |
1618 |
void *pcpu = (void *)(long)cpu; |
1619 |
zcache_cpu_notifier(&zcache_cpu_notifier_block, |
1620 |
CPU_UP_PREPARE, pcpu); |
1621 |
} |
1622 |
} |
1623 |
zcache_objnode_cache = kmem_cache_create("zcache_objnode", |
1624 |
sizeof(struct tmem_objnode), 0, 0, NULL); |
1625 |
zcache_obj_cache = kmem_cache_create("zcache_obj", |
1626 |
sizeof(struct tmem_obj), 0, 0, NULL); |
1627 |
#endif |
1628 |
#ifdef CONFIG_CLEANCACHE |
1629 |
if (zcache_enabled && use_cleancache) { |
1630 |
struct cleancache_ops old_ops; |
1631 |
|
1632 |
zbud_init(); |
1633 |
register_shrinker(&zcache_shrinker); |
1634 |
old_ops = zcache_cleancache_register_ops(); |
1635 |
pr_info("zcache: cleancache enabled using kernel " |
1636 |
"transcendent memory and compression buddies\n"); |
1637 |
if (old_ops.init_fs != NULL) |
1638 |
pr_warning("zcache: cleancache_ops overridden"); |
1639 |
} |
1640 |
#endif |
1641 |
#ifdef CONFIG_FRONTSWAP |
1642 |
if (zcache_enabled && use_frontswap) { |
1643 |
struct frontswap_ops old_ops; |
1644 |
|
1645 |
zcache_client.xvpool = xv_create_pool(); |
1646 |
if (zcache_client.xvpool == NULL) { |
1647 |
pr_err("zcache: can't create xvpool\n"); |
1648 |
goto out; |
1649 |
} |
1650 |
old_ops = zcache_frontswap_register_ops(); |
1651 |
pr_info("zcache: frontswap enabled using kernel " |
1652 |
"transcendent memory and xvmalloc\n"); |
1653 |
if (old_ops.init != NULL) |
1654 |
pr_warning("ktmem: frontswap_ops overridden"); |
1655 |
} |
1656 |
#endif |
1657 |
out: |
1658 |
return ret; |
1659 |
} |
1660 |
|
1661 |
module_init(zcache_init) |