Line 0
Link Here
|
|
|
1 |
/* |
2 |
* dm-snapshot.c |
3 |
* |
4 |
* Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 |
* |
6 |
* This file is released under the GPL. |
7 |
*/ |
8 |
|
9 |
#include <linux/config.h> |
10 |
#include <linux/ctype.h> |
11 |
#include <linux/module.h> |
12 |
#include <linux/init.h> |
13 |
#include <linux/slab.h> |
14 |
#include <linux/list.h> |
15 |
#include <linux/fs.h> |
16 |
#include <linux/blkdev.h> |
17 |
#include <linux/mempool.h> |
18 |
#include <linux/device-mapper.h> |
19 |
#include <linux/vmalloc.h> |
20 |
|
21 |
#include "dm-snapshot.h" |
22 |
#include "kcopyd.h" |
23 |
|
24 |
/* |
25 |
* FIXME: Remove this before release. |
26 |
*/ |
27 |
#if 0 |
28 |
#define DMDEBUG(x...) DMWARN( ## x) |
29 |
#else |
30 |
#define DMDEBUG(x...) |
31 |
#endif |
32 |
|
33 |
/* |
34 |
* The percentage increment we will wake up users at |
35 |
*/ |
36 |
#define WAKE_UP_PERCENT 5 |
37 |
|
38 |
/* |
39 |
* kcopyd priority of snapshot operations |
40 |
*/ |
41 |
#define SNAPSHOT_COPY_PRIORITY 2 |
42 |
|
43 |
struct pending_exception { |
44 |
struct exception e; |
45 |
|
46 |
/* |
47 |
* Origin buffers waiting for this to complete are held |
48 |
* in a list (using b_reqnext). |
49 |
*/ |
50 |
struct buffer_head *origin_bhs; |
51 |
struct buffer_head *snapshot_bhs; |
52 |
|
53 |
/* |
54 |
* Other pending_exceptions that are processing this |
55 |
* chunk. When this list is empty, we know we can |
56 |
* complete the origins. |
57 |
*/ |
58 |
struct list_head siblings; |
59 |
|
60 |
/* Pointer back to snapshot context */ |
61 |
struct dm_snapshot *snap; |
62 |
|
63 |
/* |
64 |
* 1 indicates the exception has already been sent to |
65 |
* kcopyd. |
66 |
*/ |
67 |
int started; |
68 |
}; |
69 |
|
70 |
/* |
71 |
* Hash table mapping origin volumes to lists of snapshots and |
72 |
* a lock to protect it |
73 |
*/ |
74 |
static kmem_cache_t *exception_cache; |
75 |
static kmem_cache_t *pending_cache; |
76 |
static mempool_t *pending_pool; |
77 |
|
78 |
/* |
79 |
* One of these per registered origin, held in the snapshot_origins hash |
80 |
*/ |
81 |
struct origin { |
82 |
/* The origin device */ |
83 |
kdev_t dev; |
84 |
|
85 |
struct list_head hash_list; |
86 |
|
87 |
/* List of snapshots for this origin */ |
88 |
struct list_head snapshots; |
89 |
}; |
90 |
|
91 |
/* |
92 |
* Size of the hash table for origin volumes. If we make this |
93 |
* the size of the minors list then it should be nearly perfect |
94 |
*/ |
95 |
#define ORIGIN_HASH_SIZE 256 |
96 |
#define ORIGIN_MASK 0xFF |
97 |
static struct list_head *_origins; |
98 |
static struct rw_semaphore _origins_lock; |
99 |
|
100 |
static int init_origin_hash(void) |
101 |
{ |
102 |
int i; |
103 |
|
104 |
_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), |
105 |
GFP_KERNEL); |
106 |
if (!_origins) { |
107 |
DMERR("Device mapper: Snapshot: unable to allocate memory"); |
108 |
return -ENOMEM; |
109 |
} |
110 |
|
111 |
for (i = 0; i < ORIGIN_HASH_SIZE; i++) |
112 |
INIT_LIST_HEAD(_origins + i); |
113 |
init_rwsem(&_origins_lock); |
114 |
|
115 |
return 0; |
116 |
} |
117 |
|
118 |
static void exit_origin_hash(void) |
119 |
{ |
120 |
kfree(_origins); |
121 |
} |
122 |
|
123 |
static inline unsigned int origin_hash(kdev_t dev) |
124 |
{ |
125 |
return MINOR(dev) & ORIGIN_MASK; |
126 |
} |
127 |
|
128 |
static struct origin *__lookup_origin(kdev_t origin) |
129 |
{ |
130 |
struct list_head *slist; |
131 |
struct list_head *ol; |
132 |
struct origin *o; |
133 |
|
134 |
ol = &_origins[origin_hash(origin)]; |
135 |
list_for_each(slist, ol) { |
136 |
o = list_entry(slist, struct origin, hash_list); |
137 |
|
138 |
if (o->dev == origin) |
139 |
return o; |
140 |
} |
141 |
|
142 |
return NULL; |
143 |
} |
144 |
|
145 |
static void __insert_origin(struct origin *o) |
146 |
{ |
147 |
struct list_head *sl = &_origins[origin_hash(o->dev)]; |
148 |
list_add_tail(&o->hash_list, sl); |
149 |
} |
150 |
|
151 |
/* |
152 |
* Make a note of the snapshot and its origin so we can look it |
153 |
* up when the origin has a write on it. |
154 |
*/ |
155 |
static int register_snapshot(struct dm_snapshot *snap) |
156 |
{ |
157 |
struct origin *o; |
158 |
kdev_t dev = snap->origin->dev; |
159 |
|
160 |
down_write(&_origins_lock); |
161 |
o = __lookup_origin(dev); |
162 |
|
163 |
if (!o) { |
164 |
/* New origin */ |
165 |
o = kmalloc(sizeof(*o), GFP_KERNEL); |
166 |
if (!o) { |
167 |
up_write(&_origins_lock); |
168 |
return -ENOMEM; |
169 |
} |
170 |
|
171 |
/* Initialise the struct */ |
172 |
INIT_LIST_HEAD(&o->snapshots); |
173 |
o->dev = dev; |
174 |
|
175 |
__insert_origin(o); |
176 |
} |
177 |
|
178 |
list_add_tail(&snap->list, &o->snapshots); |
179 |
|
180 |
up_write(&_origins_lock); |
181 |
return 0; |
182 |
} |
183 |
|
184 |
static void unregister_snapshot(struct dm_snapshot *s) |
185 |
{ |
186 |
struct origin *o; |
187 |
|
188 |
down_write(&_origins_lock); |
189 |
o = __lookup_origin(s->origin->dev); |
190 |
|
191 |
list_del(&s->list); |
192 |
if (list_empty(&o->snapshots)) { |
193 |
list_del(&o->hash_list); |
194 |
kfree(o); |
195 |
} |
196 |
|
197 |
up_write(&_origins_lock); |
198 |
} |
199 |
|
200 |
/* |
201 |
* Implementation of the exception hash tables. |
202 |
*/ |
203 |
static int init_exception_table(struct exception_table *et, uint32_t size) |
204 |
{ |
205 |
int i; |
206 |
|
207 |
et->hash_mask = size - 1; |
208 |
et->table = vcalloc(size, sizeof(struct list_head)); |
209 |
if (!et->table) |
210 |
return -ENOMEM; |
211 |
|
212 |
for (i = 0; i < size; i++) |
213 |
INIT_LIST_HEAD(et->table + i); |
214 |
|
215 |
return 0; |
216 |
} |
217 |
|
218 |
static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) |
219 |
{ |
220 |
struct list_head *slot, *entry, *temp; |
221 |
struct exception *ex; |
222 |
int i, size; |
223 |
|
224 |
size = et->hash_mask + 1; |
225 |
for (i = 0; i < size; i++) { |
226 |
slot = et->table + i; |
227 |
|
228 |
list_for_each_safe(entry, temp, slot) { |
229 |
ex = list_entry(entry, struct exception, hash_list); |
230 |
kmem_cache_free(mem, ex); |
231 |
} |
232 |
} |
233 |
|
234 |
vfree(et->table); |
235 |
} |
236 |
|
237 |
/* |
238 |
* FIXME: check how this hash fn is performing. |
239 |
*/ |
240 |
static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) |
241 |
{ |
242 |
return chunk & et->hash_mask; |
243 |
} |
244 |
|
245 |
static void insert_exception(struct exception_table *eh, struct exception *e) |
246 |
{ |
247 |
struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; |
248 |
list_add(&e->hash_list, l); |
249 |
} |
250 |
|
251 |
static inline void remove_exception(struct exception *e) |
252 |
{ |
253 |
list_del(&e->hash_list); |
254 |
} |
255 |
|
256 |
/* |
257 |
* Return the exception data for a sector, or NULL if not |
258 |
* remapped. |
259 |
*/ |
260 |
static struct exception *lookup_exception(struct exception_table *et, |
261 |
chunk_t chunk) |
262 |
{ |
263 |
struct list_head *slot, *el; |
264 |
struct exception *e; |
265 |
|
266 |
slot = &et->table[exception_hash(et, chunk)]; |
267 |
list_for_each(el, slot) { |
268 |
e = list_entry(el, struct exception, hash_list); |
269 |
if (e->old_chunk == chunk) |
270 |
return e; |
271 |
} |
272 |
|
273 |
return NULL; |
274 |
} |
275 |
|
276 |
static inline struct exception *alloc_exception(void) |
277 |
{ |
278 |
struct exception *e; |
279 |
|
280 |
e = kmem_cache_alloc(exception_cache, GFP_NOIO); |
281 |
if (!e) |
282 |
e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); |
283 |
|
284 |
return e; |
285 |
} |
286 |
|
287 |
static inline void free_exception(struct exception *e) |
288 |
{ |
289 |
kmem_cache_free(exception_cache, e); |
290 |
} |
291 |
|
292 |
static inline struct pending_exception *alloc_pending_exception(void) |
293 |
{ |
294 |
return mempool_alloc(pending_pool, GFP_NOIO); |
295 |
} |
296 |
|
297 |
static inline void free_pending_exception(struct pending_exception *pe) |
298 |
{ |
299 |
mempool_free(pe, pending_pool); |
300 |
} |
301 |
|
302 |
int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) |
303 |
{ |
304 |
struct exception *e; |
305 |
|
306 |
e = alloc_exception(); |
307 |
if (!e) |
308 |
return -ENOMEM; |
309 |
|
310 |
e->old_chunk = old; |
311 |
e->new_chunk = new; |
312 |
insert_exception(&s->complete, e); |
313 |
return 0; |
314 |
} |
315 |
|
316 |
/* |
317 |
* Hard coded magic. |
318 |
*/ |
319 |
static int calc_max_buckets(void) |
320 |
{ |
321 |
unsigned long mem; |
322 |
|
323 |
mem = num_physpages << PAGE_SHIFT; |
324 |
mem /= 50; |
325 |
mem /= sizeof(struct list_head); |
326 |
|
327 |
return mem; |
328 |
} |
329 |
|
330 |
/* |
331 |
* Rounds a number down to a power of 2. |
332 |
*/ |
333 |
static inline uint32_t round_down(uint32_t n) |
334 |
{ |
335 |
while (n & (n - 1)) |
336 |
n &= (n - 1); |
337 |
return n; |
338 |
} |
339 |
|
340 |
/* |
341 |
* Allocate room for a suitable hash table. |
342 |
*/ |
343 |
static int init_hash_tables(struct dm_snapshot *s) |
344 |
{ |
345 |
sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; |
346 |
|
347 |
/* |
348 |
* Calculate based on the size of the original volume or |
349 |
* the COW volume... |
350 |
*/ |
351 |
cow_dev_size = get_dev_size(s->cow->dev); |
352 |
origin_dev_size = get_dev_size(s->origin->dev); |
353 |
max_buckets = calc_max_buckets(); |
354 |
|
355 |
hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size; |
356 |
hash_size = min(hash_size, max_buckets); |
357 |
|
358 |
/* Round it down to a power of 2 */ |
359 |
hash_size = round_down(hash_size); |
360 |
if (init_exception_table(&s->complete, hash_size)) |
361 |
return -ENOMEM; |
362 |
|
363 |
/* |
364 |
* Allocate hash table for in-flight exceptions |
365 |
* Make this smaller than the real hash table |
366 |
*/ |
367 |
hash_size >>= 3; |
368 |
if (!hash_size) |
369 |
hash_size = 64; |
370 |
|
371 |
if (init_exception_table(&s->pending, hash_size)) { |
372 |
exit_exception_table(&s->complete, exception_cache); |
373 |
return -ENOMEM; |
374 |
} |
375 |
|
376 |
return 0; |
377 |
} |
378 |
|
379 |
/* |
380 |
* Round a number up to the nearest 'size' boundary. size must |
381 |
* be a power of 2. |
382 |
*/ |
383 |
static inline ulong round_up(ulong n, ulong size) |
384 |
{ |
385 |
size--; |
386 |
return (n + size) & ~size; |
387 |
} |
388 |
|
389 |
/* |
390 |
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> |
391 |
*/ |
392 |
static int snapshot_ctr(struct dm_target *ti, int argc, char **argv) |
393 |
{ |
394 |
struct dm_snapshot *s; |
395 |
unsigned long chunk_size; |
396 |
int r = -EINVAL; |
397 |
char persistent; |
398 |
char *origin_path; |
399 |
char *cow_path; |
400 |
char *value; |
401 |
int blocksize; |
402 |
|
403 |
if (argc < 4) { |
404 |
ti->error = "dm-snapshot: requires exactly 4 arguments"; |
405 |
r = -EINVAL; |
406 |
goto bad; |
407 |
} |
408 |
|
409 |
origin_path = argv[0]; |
410 |
cow_path = argv[1]; |
411 |
persistent = toupper(*argv[2]); |
412 |
|
413 |
if (persistent != 'P' && persistent != 'N') { |
414 |
ti->error = "Persistent flag is not P or N"; |
415 |
r = -EINVAL; |
416 |
goto bad; |
417 |
} |
418 |
|
419 |
chunk_size = simple_strtoul(argv[3], &value, 10); |
420 |
if (chunk_size == 0 || value == NULL) { |
421 |
ti->error = "Invalid chunk size"; |
422 |
r = -EINVAL; |
423 |
goto bad; |
424 |
} |
425 |
|
426 |
s = kmalloc(sizeof(*s), GFP_KERNEL); |
427 |
if (s == NULL) { |
428 |
ti->error = "Cannot allocate snapshot context private " |
429 |
"structure"; |
430 |
r = -ENOMEM; |
431 |
goto bad; |
432 |
} |
433 |
|
434 |
r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); |
435 |
if (r) { |
436 |
ti->error = "Cannot get origin device"; |
437 |
goto bad_free; |
438 |
} |
439 |
|
440 |
/* FIXME: get cow length */ |
441 |
r = dm_get_device(ti, cow_path, 0, 0, |
442 |
FMODE_READ | FMODE_WRITE, &s->cow); |
443 |
if (r) { |
444 |
dm_put_device(ti, s->origin); |
445 |
ti->error = "Cannot get COW device"; |
446 |
goto bad_free; |
447 |
} |
448 |
|
449 |
/* |
450 |
* Chunk size must be multiple of page size. Silently |
451 |
* round up if it's not. |
452 |
*/ |
453 |
chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE); |
454 |
|
455 |
/* Validate the chunk size against the device block size */ |
456 |
blocksize = get_hardsect_size(s->cow->dev); |
457 |
if (chunk_size % (blocksize / SECTOR_SIZE)) { |
458 |
ti->error = "Chunk size is not a multiple of device blocksize"; |
459 |
r = -EINVAL; |
460 |
goto bad_putdev; |
461 |
} |
462 |
|
463 |
/* Check the sizes are small enough to fit in one kiovec */ |
464 |
if (chunk_size > KIO_MAX_SECTORS) { |
465 |
ti->error = "Chunk size is too big"; |
466 |
r = -EINVAL; |
467 |
goto bad_putdev; |
468 |
} |
469 |
|
470 |
/* Check chunk_size is a power of 2 */ |
471 |
if (chunk_size & (chunk_size - 1)) { |
472 |
ti->error = "Chunk size is not a power of 2"; |
473 |
r = -EINVAL; |
474 |
goto bad_putdev; |
475 |
} |
476 |
|
477 |
s->chunk_size = chunk_size; |
478 |
s->chunk_mask = chunk_size - 1; |
479 |
s->type = persistent; |
480 |
for (s->chunk_shift = 0; chunk_size; |
481 |
s->chunk_shift++, chunk_size >>= 1) |
482 |
; |
483 |
s->chunk_shift--; |
484 |
|
485 |
s->valid = 1; |
486 |
s->last_percent = 0; |
487 |
init_rwsem(&s->lock); |
488 |
s->table = ti->table; |
489 |
|
490 |
/* Allocate hash table for COW data */ |
491 |
if (init_hash_tables(s)) { |
492 |
ti->error = "Unable to allocate hash table space"; |
493 |
r = -ENOMEM; |
494 |
goto bad_putdev; |
495 |
} |
496 |
|
497 |
/* |
498 |
* Check the persistent flag - done here because we need the iobuf |
499 |
* to check the LV header |
500 |
*/ |
501 |
s->store.snap = s; |
502 |
|
503 |
if (persistent == 'P') |
504 |
r = dm_create_persistent(&s->store, s->chunk_size); |
505 |
else |
506 |
r = dm_create_transient(&s->store, s, blocksize); |
507 |
|
508 |
if (r) { |
509 |
ti->error = "Couldn't create exception store"; |
510 |
r = -EINVAL; |
511 |
goto bad_free1; |
512 |
} |
513 |
|
514 |
/* Flush IO to the origin device */ |
515 |
#if LVM_VFS_ENHANCEMENT |
516 |
fsync_dev_lockfs(s->origin->dev); |
517 |
#else |
518 |
fsync_dev(s->origin->dev); |
519 |
#endif |
520 |
|
521 |
/* Add snapshot to the list of snapshots for this origin */ |
522 |
if (register_snapshot(s)) { |
523 |
r = -EINVAL; |
524 |
ti->error = "Cannot register snapshot origin"; |
525 |
goto bad_free2; |
526 |
} |
527 |
#if LVM_VFS_ENHANCEMENT |
528 |
unlockfs(s->origin->dev); |
529 |
#endif |
530 |
kcopyd_inc_client_count(); |
531 |
|
532 |
ti->private = s; |
533 |
return 0; |
534 |
|
535 |
bad_free2: |
536 |
#if LVM_VFS_ENHANCEMENT |
537 |
unlockfs(s->origin->dev); |
538 |
#endif |
539 |
s->store.destroy(&s->store); |
540 |
|
541 |
bad_free1: |
542 |
exit_exception_table(&s->pending, pending_cache); |
543 |
exit_exception_table(&s->complete, exception_cache); |
544 |
|
545 |
bad_putdev: |
546 |
dm_put_device(ti, s->cow); |
547 |
dm_put_device(ti, s->origin); |
548 |
|
549 |
bad_free: |
550 |
kfree(s); |
551 |
|
552 |
bad: |
553 |
return r; |
554 |
} |
555 |
|
556 |
static void snapshot_dtr(struct dm_target *ti) |
557 |
{ |
558 |
struct dm_snapshot *s = (struct dm_snapshot *) ti->private; |
559 |
|
560 |
dm_table_event(ti->table); |
561 |
|
562 |
unregister_snapshot(s); |
563 |
|
564 |
exit_exception_table(&s->pending, pending_cache); |
565 |
exit_exception_table(&s->complete, exception_cache); |
566 |
|
567 |
/* Deallocate memory used */ |
568 |
s->store.destroy(&s->store); |
569 |
|
570 |
dm_put_device(ti, s->origin); |
571 |
dm_put_device(ti, s->cow); |
572 |
kfree(s); |
573 |
|
574 |
kcopyd_dec_client_count(); |
575 |
} |
576 |
|
577 |
/* |
578 |
* We hold lists of buffer_heads, using the b_reqnext field. |
579 |
*/ |
580 |
static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh) |
581 |
{ |
582 |
bh->b_reqnext = *queue; |
583 |
*queue = bh; |
584 |
} |
585 |
|
586 |
/* |
587 |
* Flush a list of buffers. |
588 |
*/ |
589 |
static void flush_buffers(struct buffer_head *bh) |
590 |
{ |
591 |
struct buffer_head *n; |
592 |
|
593 |
DMDEBUG("begin flush"); |
594 |
while (bh) { |
595 |
n = bh->b_reqnext; |
596 |
bh->b_reqnext = NULL; |
597 |
DMDEBUG("flushing %p", bh); |
598 |
generic_make_request(WRITE, bh); |
599 |
bh = n; |
600 |
} |
601 |
|
602 |
run_task_queue(&tq_disk); |
603 |
} |
604 |
|
605 |
/* |
606 |
* Error a list of buffers. |
607 |
*/ |
608 |
static void error_buffers(struct buffer_head *bh) |
609 |
{ |
610 |
struct buffer_head *n; |
611 |
|
612 |
while (bh) { |
613 |
n = bh->b_reqnext; |
614 |
bh->b_reqnext = NULL; |
615 |
buffer_IO_error(bh); |
616 |
bh = n; |
617 |
} |
618 |
} |
619 |
|
620 |
static void pending_complete(struct pending_exception *pe, int success) |
621 |
{ |
622 |
struct exception *e; |
623 |
struct dm_snapshot *s = pe->snap; |
624 |
|
625 |
if (success) { |
626 |
e = alloc_exception(); |
627 |
if (!e) { |
628 |
printk("Unable to allocate exception."); |
629 |
down_write(&s->lock); |
630 |
s->store.drop_snapshot(&s->store); |
631 |
s->valid = 0; |
632 |
up_write(&s->lock); |
633 |
return; |
634 |
} |
635 |
|
636 |
/* |
637 |
* Add a proper exception, and remove the |
638 |
* inflight exception from the list. |
639 |
*/ |
640 |
down_write(&s->lock); |
641 |
|
642 |
memcpy(e, &pe->e, sizeof(*e)); |
643 |
insert_exception(&s->complete, e); |
644 |
remove_exception(&pe->e); |
645 |
|
646 |
/* Submit any pending write BHs */ |
647 |
up_write(&s->lock); |
648 |
|
649 |
flush_buffers(pe->snapshot_bhs); |
650 |
DMDEBUG("Exception completed successfully."); |
651 |
|
652 |
/* Notify any interested parties */ |
653 |
if (s->store.percent_full) { |
654 |
int pc = s->store.percent_full(&s->store); |
655 |
|
656 |
if (pc >= s->last_percent + WAKE_UP_PERCENT) { |
657 |
dm_table_event(s->table); |
658 |
s->last_percent = pc - pc % WAKE_UP_PERCENT; |
659 |
} |
660 |
} |
661 |
|
662 |
} else { |
663 |
/* Read/write error - snapshot is unusable */ |
664 |
DMERR("Error reading/writing snapshot"); |
665 |
|
666 |
down_write(&s->lock); |
667 |
s->store.drop_snapshot(&s->store); |
668 |
s->valid = 0; |
669 |
remove_exception(&pe->e); |
670 |
up_write(&s->lock); |
671 |
|
672 |
error_buffers(pe->snapshot_bhs); |
673 |
|
674 |
dm_table_event(s->table); |
675 |
DMDEBUG("Exception failed."); |
676 |
} |
677 |
|
678 |
if (list_empty(&pe->siblings)) |
679 |
flush_buffers(pe->origin_bhs); |
680 |
else |
681 |
list_del(&pe->siblings); |
682 |
|
683 |
free_pending_exception(pe); |
684 |
} |
685 |
|
686 |
static void commit_callback(void *context, int success) |
687 |
{ |
688 |
struct pending_exception *pe = (struct pending_exception *) context; |
689 |
pending_complete(pe, success); |
690 |
} |
691 |
|
692 |
/* |
693 |
* Called when the copy I/O has finished. kcopyd actually runs |
694 |
* this code so don't block. |
695 |
*/ |
696 |
static void copy_callback(int err, void *context) |
697 |
{ |
698 |
struct pending_exception *pe = (struct pending_exception *) context; |
699 |
struct dm_snapshot *s = pe->snap; |
700 |
|
701 |
if (err) |
702 |
pending_complete(pe, 0); |
703 |
|
704 |
else |
705 |
/* Update the metadata if we are persistent */ |
706 |
s->store.commit_exception(&s->store, &pe->e, commit_callback, |
707 |
pe); |
708 |
} |
709 |
|
710 |
/* |
711 |
* Dispatches the copy operation to kcopyd. |
712 |
*/ |
713 |
static inline void start_copy(struct pending_exception *pe) |
714 |
{ |
715 |
struct dm_snapshot *s = pe->snap; |
716 |
struct kcopyd_region src, dest; |
717 |
|
718 |
src.dev = s->origin->dev; |
719 |
src.sector = chunk_to_sector(s, pe->e.old_chunk); |
720 |
src.count = s->chunk_size; |
721 |
|
722 |
dest.dev = s->cow->dev; |
723 |
dest.sector = chunk_to_sector(s, pe->e.new_chunk); |
724 |
dest.count = s->chunk_size; |
725 |
|
726 |
if (!pe->started) { |
727 |
/* Hand over to kcopyd */ |
728 |
kcopyd_copy(&src, &dest, copy_callback, pe); |
729 |
pe->started = 1; |
730 |
} |
731 |
} |
732 |
|
733 |
/* |
734 |
* Looks to see if this snapshot already has a pending exception |
735 |
* for this chunk, otherwise it allocates a new one and inserts |
736 |
* it into the pending table. |
737 |
*/ |
738 |
static struct pending_exception *find_pending_exception(struct dm_snapshot *s, |
739 |
struct buffer_head *bh) |
740 |
{ |
741 |
struct exception *e; |
742 |
struct pending_exception *pe; |
743 |
chunk_t chunk = sector_to_chunk(s, bh->b_rsector); |
744 |
|
745 |
/* |
746 |
* Is there a pending exception for this already ? |
747 |
*/ |
748 |
e = lookup_exception(&s->pending, chunk); |
749 |
if (e) { |
750 |
/* cast the exception to a pending exception */ |
751 |
pe = list_entry(e, struct pending_exception, e); |
752 |
|
753 |
} else { |
754 |
/* Create a new pending exception */ |
755 |
pe = alloc_pending_exception(); |
756 |
if (!pe) { |
757 |
DMWARN("Couldn't allocate pending exception."); |
758 |
return NULL; |
759 |
} |
760 |
|
761 |
pe->e.old_chunk = chunk; |
762 |
pe->origin_bhs = pe->snapshot_bhs = NULL; |
763 |
INIT_LIST_HEAD(&pe->siblings); |
764 |
pe->snap = s; |
765 |
pe->started = 0; |
766 |
|
767 |
if (s->store.prepare_exception(&s->store, &pe->e)) { |
768 |
free_pending_exception(pe); |
769 |
s->valid = 0; |
770 |
return NULL; |
771 |
} |
772 |
|
773 |
insert_exception(&s->pending, &pe->e); |
774 |
} |
775 |
|
776 |
return pe; |
777 |
} |
778 |
|
779 |
static inline void remap_exception(struct dm_snapshot *s, struct exception *e, |
780 |
struct buffer_head *bh) |
781 |
{ |
782 |
bh->b_rdev = s->cow->dev; |
783 |
bh->b_rsector = chunk_to_sector(s, e->new_chunk) + |
784 |
(bh->b_rsector & s->chunk_mask); |
785 |
} |
786 |
|
787 |
static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw, |
788 |
void **map_context) |
789 |
{ |
790 |
struct exception *e; |
791 |
struct dm_snapshot *s = (struct dm_snapshot *) ti->private; |
792 |
int r = 1; |
793 |
chunk_t chunk; |
794 |
struct pending_exception *pe; |
795 |
|
796 |
chunk = sector_to_chunk(s, bh->b_rsector); |
797 |
|
798 |
/* Full snapshots are not usable */ |
799 |
if (!s->valid) |
800 |
return -1; |
801 |
|
802 |
/* |
803 |
* Write to snapshot - higher level takes care of RW/RO |
804 |
* flags so we should only get this if we are |
805 |
* writeable. |
806 |
*/ |
807 |
if (rw == WRITE) { |
808 |
|
809 |
down_write(&s->lock); |
810 |
|
811 |
/* If the block is already remapped - use that, else remap it */ |
812 |
e = lookup_exception(&s->complete, chunk); |
813 |
if (e) |
814 |
remap_exception(s, e, bh); |
815 |
|
816 |
else { |
817 |
pe = find_pending_exception(s, bh); |
818 |
|
819 |
if (!pe) { |
820 |
s->store.drop_snapshot(&s->store); |
821 |
s->valid = 0; |
822 |
} |
823 |
|
824 |
queue_buffer(&pe->snapshot_bhs, bh); |
825 |
start_copy(pe); |
826 |
r = 0; |
827 |
} |
828 |
|
829 |
up_write(&s->lock); |
830 |
|
831 |
} else { |
832 |
/* |
833 |
* FIXME: this read path scares me because we |
834 |
* always use the origin when we have a pending |
835 |
* exception. However I can't think of a |
836 |
* situation where this is wrong - ejt. |
837 |
*/ |
838 |
|
839 |
/* Do reads */ |
840 |
down_read(&s->lock); |
841 |
|
842 |
/* See if it it has been remapped */ |
843 |
e = lookup_exception(&s->complete, chunk); |
844 |
if (e) |
845 |
remap_exception(s, e, bh); |
846 |
else |
847 |
bh->b_rdev = s->origin->dev; |
848 |
|
849 |
up_read(&s->lock); |
850 |
} |
851 |
|
852 |
return r; |
853 |
} |
854 |
|
855 |
static void list_merge(struct list_head *l1, struct list_head *l2) |
856 |
{ |
857 |
struct list_head *l1_n, *l2_p; |
858 |
|
859 |
l1_n = l1->next; |
860 |
l2_p = l2->prev; |
861 |
|
862 |
l1->next = l2; |
863 |
l2->prev = l1; |
864 |
|
865 |
l2_p->next = l1_n; |
866 |
l1_n->prev = l2_p; |
867 |
} |
868 |
|
869 |
static int __origin_write(struct list_head *snapshots, struct buffer_head *bh) |
870 |
{ |
871 |
int r = 1; |
872 |
struct list_head *sl; |
873 |
struct dm_snapshot *snap; |
874 |
struct exception *e; |
875 |
struct pending_exception *pe, *last = NULL; |
876 |
chunk_t chunk; |
877 |
|
878 |
/* Do all the snapshots on this origin */ |
879 |
list_for_each(sl, snapshots) { |
880 |
snap = list_entry(sl, struct dm_snapshot, list); |
881 |
|
882 |
/* Only deal with valid snapshots */ |
883 |
if (!snap->valid) |
884 |
continue; |
885 |
|
886 |
down_write(&snap->lock); |
887 |
|
888 |
/* |
889 |
* Remember, different snapshots can have |
890 |
* different chunk sizes. |
891 |
*/ |
892 |
chunk = sector_to_chunk(snap, bh->b_rsector); |
893 |
|
894 |
/* |
895 |
* Check exception table to see if block |
896 |
* is already remapped in this snapshot |
897 |
* and trigger an exception if not. |
898 |
*/ |
899 |
e = lookup_exception(&snap->complete, chunk); |
900 |
if (!e) { |
901 |
pe = find_pending_exception(snap, bh); |
902 |
if (!pe) { |
903 |
snap->store.drop_snapshot(&snap->store); |
904 |
snap->valid = 0; |
905 |
|
906 |
} else { |
907 |
if (last) |
908 |
list_merge(&pe->siblings, |
909 |
&last->siblings); |
910 |
|
911 |
last = pe; |
912 |
r = 0; |
913 |
} |
914 |
} |
915 |
|
916 |
up_write(&snap->lock); |
917 |
} |
918 |
|
919 |
/* |
920 |
* Now that we have a complete pe list we can start the copying. |
921 |
*/ |
922 |
if (last) { |
923 |
pe = last; |
924 |
do { |
925 |
down_write(&pe->snap->lock); |
926 |
queue_buffer(&pe->origin_bhs, bh); |
927 |
start_copy(pe); |
928 |
up_write(&pe->snap->lock); |
929 |
pe = list_entry(pe->siblings.next, |
930 |
struct pending_exception, siblings); |
931 |
|
932 |
} while (pe != last); |
933 |
} |
934 |
|
935 |
return r; |
936 |
} |
937 |
|
938 |
static int snapshot_status(struct dm_target *ti, status_type_t type, |
939 |
char *result, int maxlen) |
940 |
{ |
941 |
struct dm_snapshot *snap = (struct dm_snapshot *) ti->private; |
942 |
char cow[16]; |
943 |
char org[16]; |
944 |
|
945 |
switch (type) { |
946 |
case STATUSTYPE_INFO: |
947 |
if (!snap->valid) |
948 |
snprintf(result, maxlen, "Invalid"); |
949 |
else { |
950 |
if (snap->store.percent_full) |
951 |
snprintf(result, maxlen, "%d%%", |
952 |
snap->store.percent_full(&snap-> |
953 |
store)); |
954 |
else |
955 |
snprintf(result, maxlen, "Unknown"); |
956 |
} |
957 |
break; |
958 |
|
959 |
case STATUSTYPE_TABLE: |
960 |
/* |
961 |
* kdevname returns a static pointer so we need |
962 |
* to make private copies if the output is to |
963 |
* make sense. |
964 |
*/ |
965 |
strncpy(cow, kdevname(snap->cow->dev), sizeof(cow)); |
966 |
strncpy(org, kdevname(snap->origin->dev), sizeof(org)); |
967 |
snprintf(result, maxlen, "%s %s %c %ld", org, cow, |
968 |
snap->type, snap->chunk_size); |
969 |
break; |
970 |
} |
971 |
|
972 |
return 0; |
973 |
} |
974 |
|
975 |
/* |
976 |
* Called on a write from the origin driver. |
977 |
*/ |
978 |
int do_origin(struct dm_dev *origin, struct buffer_head *bh) |
979 |
{ |
980 |
struct origin *o; |
981 |
int r; |
982 |
|
983 |
down_read(&_origins_lock); |
984 |
o = __lookup_origin(origin->dev); |
985 |
if (!o) |
986 |
BUG(); |
987 |
|
988 |
r = __origin_write(&o->snapshots, bh); |
989 |
up_read(&_origins_lock); |
990 |
|
991 |
return r; |
992 |
} |
993 |
|
994 |
/* |
995 |
* Origin: maps a linear range of a device, with hooks for snapshotting. |
996 |
*/ |
997 |
|
998 |
/* |
999 |
* Construct an origin mapping: <dev_path> |
1000 |
* The context for an origin is merely a 'struct dm_dev *' |
1001 |
* pointing to the real device. |
1002 |
*/ |
1003 |
static int origin_ctr(struct dm_target *ti, int argc, char **argv) |
1004 |
{ |
1005 |
int r; |
1006 |
struct dm_dev *dev; |
1007 |
|
1008 |
if (argc != 1) { |
1009 |
ti->error = "dm-origin: incorrect number of arguments"; |
1010 |
return -EINVAL; |
1011 |
} |
1012 |
|
1013 |
r = dm_get_device(ti, argv[0], 0, ti->len, |
1014 |
dm_table_get_mode(ti->table), &dev); |
1015 |
if (r) { |
1016 |
ti->error = "Cannot get target device"; |
1017 |
return r; |
1018 |
} |
1019 |
|
1020 |
ti->private = dev; |
1021 |
|
1022 |
return 0; |
1023 |
} |
1024 |
|
1025 |
static void origin_dtr(struct dm_target *ti) |
1026 |
{ |
1027 |
struct dm_dev *dev = (struct dm_dev *) ti->private; |
1028 |
dm_put_device(ti, dev); |
1029 |
} |
1030 |
|
1031 |
static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw, |
1032 |
void **map_context) |
1033 |
{ |
1034 |
struct dm_dev *dev = (struct dm_dev *) ti->private; |
1035 |
bh->b_rdev = dev->dev; |
1036 |
|
1037 |
/* Only tell snapshots if this is a write */ |
1038 |
return (rw == WRITE) ? do_origin(dev, bh) : 1; |
1039 |
} |
1040 |
|
1041 |
static int origin_status(struct dm_target *ti, status_type_t type, char *result, |
1042 |
int maxlen) |
1043 |
{ |
1044 |
struct dm_dev *dev = (struct dm_dev *) ti->private; |
1045 |
|
1046 |
switch (type) { |
1047 |
case STATUSTYPE_INFO: |
1048 |
result[0] = '\0'; |
1049 |
break; |
1050 |
|
1051 |
case STATUSTYPE_TABLE: |
1052 |
snprintf(result, maxlen, "%s", kdevname(dev->dev)); |
1053 |
break; |
1054 |
} |
1055 |
|
1056 |
return 0; |
1057 |
} |
1058 |
|
1059 |
static struct target_type origin_target = { |
1060 |
name: "snapshot-origin", |
1061 |
module: THIS_MODULE, |
1062 |
ctr: origin_ctr, |
1063 |
dtr: origin_dtr, |
1064 |
map: origin_map, |
1065 |
status: origin_status, |
1066 |
}; |
1067 |
|
1068 |
static struct target_type snapshot_target = { |
1069 |
name: "snapshot", |
1070 |
module: THIS_MODULE, |
1071 |
ctr: snapshot_ctr, |
1072 |
dtr: snapshot_dtr, |
1073 |
map: snapshot_map, |
1074 |
status: snapshot_status, |
1075 |
}; |
1076 |
|
1077 |
int __init dm_snapshot_init(void) |
1078 |
{ |
1079 |
int r; |
1080 |
|
1081 |
r = dm_register_target(&snapshot_target); |
1082 |
if (r) { |
1083 |
DMERR("snapshot target register failed %d", r); |
1084 |
return r; |
1085 |
} |
1086 |
|
1087 |
r = dm_register_target(&origin_target); |
1088 |
if (r < 0) { |
1089 |
DMERR("Device mapper: Origin: register failed %d\n", r); |
1090 |
goto bad1; |
1091 |
} |
1092 |
|
1093 |
r = init_origin_hash(); |
1094 |
if (r) { |
1095 |
DMERR("init_origin_hash failed."); |
1096 |
goto bad2; |
1097 |
} |
1098 |
|
1099 |
exception_cache = kmem_cache_create("dm-snapshot-ex", |
1100 |
sizeof(struct exception), |
1101 |
__alignof__(struct exception), |
1102 |
0, NULL, NULL); |
1103 |
if (!exception_cache) { |
1104 |
DMERR("Couldn't create exception cache."); |
1105 |
r = -ENOMEM; |
1106 |
goto bad3; |
1107 |
} |
1108 |
|
1109 |
pending_cache = |
1110 |
kmem_cache_create("dm-snapshot-in", |
1111 |
sizeof(struct pending_exception), |
1112 |
__alignof__(struct pending_exception), |
1113 |
0, NULL, NULL); |
1114 |
if (!pending_cache) { |
1115 |
DMERR("Couldn't create pending cache."); |
1116 |
r = -ENOMEM; |
1117 |
goto bad4; |
1118 |
} |
1119 |
|
1120 |
pending_pool = mempool_create(128, mempool_alloc_slab, |
1121 |
mempool_free_slab, pending_cache); |
1122 |
if (!pending_pool) { |
1123 |
DMERR("Couldn't create pending pool."); |
1124 |
r = -ENOMEM; |
1125 |
goto bad5; |
1126 |
} |
1127 |
|
1128 |
return 0; |
1129 |
|
1130 |
bad5: |
1131 |
kmem_cache_destroy(pending_cache); |
1132 |
bad4: |
1133 |
kmem_cache_destroy(exception_cache); |
1134 |
bad3: |
1135 |
exit_origin_hash(); |
1136 |
bad2: |
1137 |
dm_unregister_target(&origin_target); |
1138 |
bad1: |
1139 |
dm_unregister_target(&snapshot_target); |
1140 |
return r; |
1141 |
} |
1142 |
|
1143 |
void dm_snapshot_exit(void) |
1144 |
{ |
1145 |
int r; |
1146 |
|
1147 |
r = dm_unregister_target(&snapshot_target); |
1148 |
if (r) |
1149 |
DMERR("snapshot unregister failed %d", r); |
1150 |
|
1151 |
r = dm_unregister_target(&origin_target); |
1152 |
if (r) |
1153 |
DMERR("origin unregister failed %d", r); |
1154 |
|
1155 |
exit_origin_hash(); |
1156 |
mempool_destroy(pending_pool); |
1157 |
kmem_cache_destroy(pending_cache); |
1158 |
kmem_cache_destroy(exception_cache); |
1159 |
} |
1160 |
|
1161 |
/* |
1162 |
* Overrides for Emacs so that we follow Linus's tabbing style. |
1163 |
* Emacs will notice this stuff at the end of the file and automatically |
1164 |
* adjust the settings for this buffer only. This must remain at the end |
1165 |
* of the file. |
1166 |
* --------------------------------------------------------------------------- |
1167 |
* Local variables: |
1168 |
* c-file-style: "linux" |
1169 |
* End: |
1170 |
*/ |