diff -Naur grub-2.02~beta2_/grub-core/fs/zfs/zfs.c grub-2.02~beta2/grub-core/fs/zfs/zfs.c --- grub-2.02~beta2_/grub-core/fs/zfs/zfs.c 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/grub-core/fs/zfs/zfs.c 2015-10-03 16:51:11.000000000 -0700 @@ -1,8 +1,10 @@ /* * GRUB -- GRand Unified Bootloader * Copyright (C) 1999,2000,2001,2002,2003,2004,2009,2010,2011 Free Software Foundation, Inc. - * Copyright 2010 Sun Microsystems, Inc. + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2015 by Toomas Soome. * * GRUB is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,16 +20,14 @@ * along with GRUB. If not, see . */ /* - * The zfs plug-in routines for GRUB are: - * - * zfs_mount() - locates a valid uberblock of the root pool and reads - * in its MOS at the memory address MOS. - * - * zfs_open() - locates a plain file object by following the MOS - * and places its dnode at the memory address DNODE. - * - * zfs_read() - read in the data blocks pointed by the DNODE. + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. */ #include @@ -59,6 +59,7 @@ GRUB_MOD_LICENSE ("GPLv3+"); #define ZPOOL_PROP_BOOTFS "bootfs" +#define BOOTFSNAME_SIZE 256 /* * For nvlist manipulation. (from nvpair.h) @@ -76,7 +77,16 @@ static grub_dl_t my_mod; #endif +#define P2ALIGN_TYPED(x, align, type) ((type)(x) & -(type)(align)) #define P2PHASE(x, align) ((x) & ((align) - 1)) +#define P2ROUNDUP(x, align) (-(-(x) & -(align))) +#ifndef NBBY +#define NBBY 8 +#endif + +static grub_err_t vdev_disk_read_rootlabel(grub_device_t, char **); +static grub_err_t vdev_attach(struct grub_zfs_data *, grub_device_t, char *, + char *, int); static inline grub_disk_addr_t DVA_OFFSET_TO_PHYS_SECTOR (grub_disk_addr_t offset) @@ -158,7 +168,6 @@ */ extern grub_err_t lzjb_decompress (void *, void *, grub_size_t, grub_size_t); - extern grub_err_t lz4_decompress (void *, void *, grub_size_t, grub_size_t); typedef grub_err_t zfs_decomp_func_t (void *s_start, void *d_start, @@ -179,10 +188,10 @@ * Information about each checksum function. */ typedef struct zio_checksum_info { - zio_checksum_t *ci_func; /* checksum function for each byteorder */ - int ci_correctable; /* number of correctable bits */ - int ci_eck; /* uses zio embedded checksum? */ - const char *ci_name; /* descriptive name */ + zio_checksum_t *ci_func; /* checksum function for each byteorder */ + int ci_correctable; /* number of correctable bits */ + int ci_eck; /* uses zio embedded checksum? */ + const char *ci_name; /* descriptive name */ } zio_checksum_info_t; typedef struct dnode_end @@ -194,10 +203,11 @@ struct grub_zfs_device_desc { enum { DEVICE_LEAF, DEVICE_MIRROR, DEVICE_RAIDZ } type; + enum { DEVICE_OK, DEVICE_ERROR } dev_state; grub_uint64_t id; grub_uint64_t guid; - unsigned ashift; - unsigned max_children_ashift; + grub_uint64_t txg; /* label transaction group */ + char *config; /* nvlist from label */ /* Valid only for non-leafs. */ unsigned n_children; @@ -206,11 +216,15 @@ /* Valid only for RAIDZ. */ unsigned nparity; + /* Valid for all */ + unsigned ashift; + /* Valid only for leaf devices. */ + struct grub_zfs_device_desc *top_vdev; grub_device_t dev; - grub_disk_addr_t vdev_phys_sector; + char *dev_name; + int original; /* we dont close original */ uberblock_t current_uberblock; - int original; }; struct subvolume @@ -229,6 +243,8 @@ struct grub_zfs_data { + int zcached; /* the value should be zero if no cache available */ + /* cache for a file block of the currently zfs_open()-ed file */ char *file_buf; grub_uint64_t file_start; @@ -248,14 +264,112 @@ struct grub_zfs_device_desc *devices_attached; unsigned n_devices_attached; unsigned n_devices_allocated; - struct grub_zfs_device_desc *device_original; uberblock_t current_uberblock; int mounted; - grub_uint64_t guid; + + grub_uint64_t guid; /* pool guid */ + grub_uint64_t state; /* pool state */ + char *label; /* pool label name */ }; +typedef struct mirror_child +{ + struct grub_zfs_device_desc *mc_vd; + grub_uint64_t mc_offset; + int mc_error; + grub_uint8_t mc_tried; + grub_uint8_t mc_skipped; + grub_uint8_t mc_speculative; +} mirror_child_t; + +typedef struct mirror_map +{ + void *mm_buf; + grub_size_t mm_size; + grub_uint64_t mm_offset; + int mm_children; + int mm_replacing; + int mm_preferred; + int mm_root; + mirror_child_t mm_child[1]; +} mirror_map_t; + +/* + * The following are taken straight from usr/src/uts/common/fs/zfs/vdev_raidz.c + * If they change there, they need to be changed here. + * + * a map of columns returned for a given offset and size + */ +typedef struct raidz_col +{ + grub_uint64_t rc_devidx; /* child device index for I/O */ + grub_uint64_t rc_offset; /* device offset */ + grub_uint64_t rc_size; /* I/O size */ + void *rc_data; /* I/O data */ + void *rc_gdata; /* used to store the "good" version */ + int rc_error; /* I/O error for this device */ + grub_uint8_t rc_tried; /* Did we attempt this I/O column? */ + grub_uint8_t rc_skipped; /* Did we skip this I/O column? */ +} raidz_col_t; + +typedef struct raidz_map +{ + grub_uint64_t rm_cols; /* Regular column count */ + grub_uint64_t rm_scols; /* Count including skipped columns */ + grub_uint64_t rm_bigcols; /* Number of oversized columns */ + grub_uint64_t rm_asize; /* Actual total I/O size */ + grub_uint64_t rm_missingdata; /* Count of missing data devices */ + grub_uint64_t rm_missingparity; /* Count of missing parity devices */ + grub_uint64_t rm_firstdatacol; /* First data column/parity count */ + grub_uint64_t rm_nskip; /* Skipped sectors for padding */ + grub_uint64_t rm_skipstart; /* Column index of padding start */ + void *rm_datacopy; /* rm_asize-buffer of copied data */ + grub_addr_t rm_reports; /* # of referencing checksum reports */ + grub_uint8_t rm_freed; /* map no longer has referencing ZIO */ + grub_uint8_t rm_ecksuminjected; /* checksum error was injected */ + raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ +} raidz_map_t; + +#define VDEV_RAIDZ_P 0 +#define VDEV_RAIDZ_Q 1 +#define VDEV_RAIDZ_R 2 +/* +#define VDEV_RAIDZ_MUL_2(x) (((x) << 1) ^ (((x) & 0x80) ? 0x1d : 0)) +#define VDEV_RAIDZ_MUL_4(x) (VDEV_RAIDZ_MUL_2(VDEV_RAIDZ_MUL_2(x))) +*/ +#define VDEV_RAIDZ_64MUL_2(x, mask) \ +{ \ + (mask) = (x) & 0x8080808080808080ULL; \ + (mask) = ((mask) << 1) - ((mask) >> 7); \ + (x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \ + ((mask) & 0x1d1d1d1d1d1d1d1dULL); \ +} + +#define VDEV_RAIDZ_64MUL_4(x, mask) \ +{ \ + VDEV_RAIDZ_64MUL_2((x), mask); \ + VDEV_RAIDZ_64MUL_2((x), mask); \ +} + +/* cache list for ZFS mount */ +struct zfs_mount_cache +{ + char *zcache_pool_name; + grub_uint64_t zcache_pool_guid; + struct grub_zfs_data *zcache_zfs_data; + struct zfs_mount_cache *next; +} *zfs_mount_cache_list; + +/* cache list for non-zfs disk dev */ +struct zfs_dev_notzfs +{ + char *dev_name; + unsigned long dev_id; + struct zfs_dev_notzfs *next; +} *zfs_dev_notzfs_list; + /* Context for grub_zfs_dir. */ struct grub_zfs_dir_ctx { @@ -279,27 +393,22 @@ * read. Note that features that are only required for write do not need * to be listed here since grub opens pools in read-only mode. */ -#define MAX_SUPPORTED_FEATURE_STRLEN 50 static const char *spa_feature_names[] = { - "org.illumos:lz4_compress",NULL + "org.illumos:lz4_compress", + "com.delphix:hole_birth", + "com.delphix:extensible_dataset", + "com.delphix:embedded_data", + "org.open-zfs:large_blocks", + NULL }; -static int -check_feature(const char *name, grub_uint64_t val, struct grub_zfs_dir_ctx *ctx); -static int -check_mos_features(dnode_phys_t *mosmdn_phys,grub_zfs_endian_t endian,struct grub_zfs_data* data ); - static grub_err_t zlib_decompress (void *s, void *d, grub_size_t slen, grub_size_t dlen) { - if (grub_zlib_decompress (s, slen, 0, d, dlen) == (grub_ssize_t) dlen) - return GRUB_ERR_NONE; - - if (!grub_errno) - grub_error (GRUB_ERR_BAD_COMPRESSED_DATA, - "premature end of compressed"); - return grub_errno; + if (grub_zlib_decompress (s, slen, 0, d, dlen) < 0) + return grub_errno; + return GRUB_ERR_NONE; } static grub_err_t @@ -356,6 +465,12 @@ static grub_err_t zio_read_data (blkptr_t * bp, grub_zfs_endian_t endian, void *buf, struct grub_zfs_data *data); +static grub_err_t zio_read_common (blkptr_t * bp, dva_t *dva, + grub_zfs_endian_t endian, void *buf, + struct grub_zfs_data *data); +static const char * nvlist_next_nvpair (const char *nvl, const char *nvpair); +static char *nvpair_name (const char *nvp); +static grub_err_t scan_devices (struct grub_zfs_data *, char *, grub_device_t); /* * Our own version of log2(). Same thing as highbit()-1. @@ -371,7 +486,7 @@ num = num >> 1; } - return i; + return (i); } /* Checksum Functions */ @@ -391,11 +506,11 @@ {zio_checksum_off, 0, 0, "off"}, {zio_checksum_SHA256, 1, 1, "label"}, {zio_checksum_SHA256, 1, 1, "gang_header"}, - {NULL, 0, 0, "zilog"}, + {fletcher_2, 0, 1, "zilog"}, {fletcher_2, 0, 0, "fletcher2"}, {fletcher_4, 1, 0, "fletcher4"}, {zio_checksum_SHA256, 1, 0, "SHA256"}, - {NULL, 0, 0, "zilog2"}, + {fletcher_4, 0, 1, "zilog2"}, {zio_checksum_SHA256, 1, 0, "SHA256+MAC"}, }; @@ -418,7 +533,7 @@ { grub_dprintf ("zfs", "unknown checksum function %d\n", checksum); return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, - "unknown checksum function %d", checksum); + N_("unknown checksum function %d"), checksum); } if (ci->ci_eck) @@ -479,19 +594,19 @@ if (grub_zfs_to_cpu64 (ub1->ub_txg, ub1_endian) < grub_zfs_to_cpu64 (ub2->ub_txg, ub2_endian)) - return -1; + return (1); if (grub_zfs_to_cpu64 (ub1->ub_txg, ub1_endian) > grub_zfs_to_cpu64 (ub2->ub_txg, ub2_endian)) - return 1; + return (-1); if (grub_zfs_to_cpu64 (ub1->ub_timestamp, ub1_endian) < grub_zfs_to_cpu64 (ub2->ub_timestamp, ub2_endian)) - return -1; + return (1); if (grub_zfs_to_cpu64 (ub1->ub_timestamp, ub1_endian) > grub_zfs_to_cpu64 (ub2->ub_timestamp, ub2_endian)) - return 1; + return (-1); - return 0; + return (0); } /* @@ -502,10 +617,8 @@ * */ static grub_err_t -uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset, - grub_size_t s) +uberblock_verify (uberblock_t *uber, grub_uint64_t offset, int ashift) { - uberblock_t *uber = &ub->ubp_uberblock; grub_err_t err; grub_zfs_endian_t endian = GRUB_ZFS_UNKNOWN_ENDIAN; zio_cksum_t zc; @@ -520,1316 +633,1794 @@ endian = GRUB_ZFS_BIG_ENDIAN; if (endian == GRUB_ZFS_UNKNOWN_ENDIAN) - return grub_error (GRUB_ERR_BAD_FS, "invalid uberblock magic"); + return grub_error (GRUB_ERR_BAD_FS, N_("invalid uberblock magic")); grub_memset (&zc, 0, sizeof (zc)); zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian); err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian, - (char *) ub, s); + (char *) uber, VDEV_UBERBLOCK_SIZE(ashift)); return err; } /* - * Find the best uberblock. - * Return: - * Success - Pointer to the best uberblock. - * Failure - NULL + * Check if this vdev is online and is in a good state. */ -static uberblock_phys_t * -find_bestub (uberblock_phys_t * ub_array, - const struct grub_zfs_device_desc *desc) +int +grub_zfs_vdev_validate (const char *nv) { - uberblock_phys_t *ubbest = NULL, *ubptr; - int i; - grub_disk_addr_t offset; - grub_err_t err = GRUB_ERR_NONE; - int ub_shift; - - ub_shift = desc->ashift; - if (ub_shift < VDEV_UBERBLOCK_SHIFT) - ub_shift = VDEV_UBERBLOCK_SHIFT; + grub_uint64_t ival = 0; - for (i = 0; i < (VDEV_UBERBLOCK_RING >> ub_shift); i++) + if (grub_zfs_nvlist_lookup_uint64 (nv, ZPOOL_CONFIG_OFFLINE, &ival) && ival) + { + grub_dprintf ("zfs", "vdev_validate: ZPOOL_CONFIG_OFFLINE\n"); + return (1); + } + if ((grub_zfs_nvlist_lookup_uint64 (nv, ZPOOL_CONFIG_FAULTED, &ival) && ival) && + !(grub_zfs_nvlist_lookup_uint64 (nv, ZPOOL_CONFIG_DEGRADED, &ival) && ival)) + { + grub_dprintf ("zfs", "vdev_validate: ZPOOL_CONFIG_FAULTED\n"); + return (1); + } + if (grub_zfs_nvlist_lookup_uint64 (nv, ZPOOL_CONFIG_REMOVED, &ival) && ival) { - offset = (desc->vdev_phys_sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE - + (i << ub_shift); + grub_dprintf ("zfs", "vdev_validate: ZPOOL_CONFIG_REMOVED\n"); + return (1); - ubptr = (uberblock_phys_t *) ((grub_properly_aligned_t *) ub_array - + ((i << ub_shift) - / sizeof (grub_properly_aligned_t))); - err = uberblock_verify (ubptr, offset, 1 << ub_shift); - if (err) - { - grub_errno = GRUB_ERR_NONE; - continue; - } - if (ubbest == NULL - || vdev_uberblock_compare (&(ubptr->ubp_uberblock), - &(ubbest->ubp_uberblock)) > 0) - ubbest = ubptr; } - if (!ubbest) - grub_errno = err; - return ubbest; + return (0); } -static inline grub_size_t -get_psize (blkptr_t * bp, grub_zfs_endian_t endian) +static int +check_feature(const char *name, grub_uint64_t val, + struct grub_zfs_dir_ctx *ctx __attribute__((unused))) { - return ((((grub_zfs_to_cpu64 ((bp)->blk_prop, endian) >> 16) & 0xffff) + 1) - << SPA_MINBLOCKSHIFT); -} + int i; -static grub_uint64_t -dva_get_offset (const dva_t *dva, grub_zfs_endian_t endian) -{ - grub_dprintf ("zfs", "dva=%llx, %llx\n", - (unsigned long long) dva->dva_word[0], - (unsigned long long) dva->dva_word[1]); - return grub_zfs_to_cpu64 ((dva)->dva_word[1], - endian) << SPA_MINBLOCKSHIFT; + if (val == 0) /* value is not set */ + return (0); + if(name[0] == 0) /* empty name */ + return (0); + + for (i = 0; spa_feature_names[i] != NULL; i++ ) + if (grub_strcmp(name, spa_feature_names[i]) == 0) + { + grub_dprintf ("zfs", "check_feature: %s ok\n", name); + return (0); + } + return grub_error(GRUB_ERR_NOT_IMPLEMENTED_YET, "unknown feature: %s", name); } -static grub_err_t -zfs_fetch_nvlist (struct grub_zfs_device_desc *diskdesc, char **nvlist) +struct scan_devices_ctx { + struct grub_zfs_data *data; + char *name; + grub_device_t dev; + int missing_vdev; + int missing_children; +}; + +static int +scan_devices_iter(const char *name, void *hook_data) { + struct scan_devices_ctx *ctx = hook_data; + struct grub_zfs_data *data = ctx->data; + struct zfs_dev_notzfs *dev_notzfs; + grub_device_t dev; + grub_uint64_t u; + unsigned i, j; + int original = 0; + char *dev_name; + char *config; grub_err_t err; - *nvlist = 0; - - if (!diskdesc->dev) - return grub_error (GRUB_ERR_BUG, "member drive unknown"); + grub_dprintf("zfs", "scan_devices: %s\n", name); - *nvlist = grub_malloc (VDEV_PHYS_SIZE); - - /* Read in the vdev name-value pair list (112K). */ - err = grub_disk_read (diskdesc->dev->disk, diskdesc->vdev_phys_sector, 0, - VDEV_PHYS_SIZE, *nvlist); - if (err) + /* scan our config first */ + ctx->missing_vdev = data->n_devices_allocated - data->n_devices_attached; + ctx->missing_children = 0; + for (i = 0; in_devices_allocated; i++) { - grub_free (*nvlist); - *nvlist = 0; - return err; - } - return GRUB_ERR_NONE; -} - -static grub_err_t -fill_vdev_info_real (struct grub_zfs_data *data, - const char *nvlist, - struct grub_zfs_device_desc *fill, - struct grub_zfs_device_desc *insert, - int *inserted, - unsigned ashift) -{ - char *type; - - type = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_TYPE); - - if (!type) - return grub_errno; + if (data->devices_attached[i].guid == 0) /* not attached */ + continue; - if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "id", &(fill->id))) - { - grub_free (type); - return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev id"); + for (j = 0; j < data->devices_attached[i].n_children; j++) + { + if (data->devices_attached[i].children[j].guid == 0) + ctx->missing_children++; + } } - if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "guid", &(fill->guid))) + if (ctx->missing_vdev == 0 && ctx->missing_children == 0) { - grub_free (type); - return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev id"); + /* nothing to do */ + return 1; } - { - grub_uint64_t par; - if (grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par)) - fill->ashift = par; - else if (ashift != 0xffffffff) - fill->ashift = ashift; - else + for (dev_notzfs = zfs_dev_notzfs_list; + dev_notzfs != NULL; dev_notzfs = dev_notzfs->next) + if (grub_strcmp(name, dev_notzfs->dev_name) == 0) { - grub_free (type); - return grub_error (GRUB_ERR_BAD_FS, "couldn't find ashift"); + grub_dprintf("zfs", "scan_devices_iter: found cached non-zfs " + "disk dev: %s\n", dev_notzfs->dev_name); + return 0; } - } - fill->max_children_ashift = 0; + if (grub_strcmp(name, ctx->name) == 0) - if (grub_strcmp (type, VDEV_TYPE_DISK) == 0 - || grub_strcmp (type, VDEV_TYPE_FILE) == 0) { - fill->type = DEVICE_LEAF; + dev = ctx->dev; + dev_name = ctx->name; + original = 1; + } + else + { + dev = grub_device_open (name); + if (!dev) + return 0; + if (!dev->disk) - if (!fill->dev && fill->guid == insert->guid) { - fill->dev = insert->dev; - fill->vdev_phys_sector = insert->vdev_phys_sector; - fill->current_uberblock = insert->current_uberblock; - fill->original = insert->original; - if (!data->device_original) - data->device_original = fill; - insert->ashift = fill->ashift; - *inserted = 1; + grub_device_close (dev); + return 0; } + dev_name = grub_strdup(name); - grub_free (type); - return GRUB_ERR_NONE; } - if (grub_strcmp (type, VDEV_TYPE_MIRROR) == 0 - || grub_strcmp (type, VDEV_TYPE_RAIDZ) == 0) + if ((err = vdev_disk_read_rootlabel (dev, &config)) != GRUB_ERR_NONE) { - int nelm, i; + dev_notzfs = grub_malloc(sizeof (struct zfs_dev_notzfs)); + if (dev_notzfs) - if (grub_strcmp (type, VDEV_TYPE_MIRROR) == 0) - fill->type = DEVICE_MIRROR; - else { - grub_uint64_t par; - fill->type = DEVICE_RAIDZ; - if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "nparity", &par)) - { - grub_free (type); - return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz parity"); - } - fill->nparity = par; + dev_notzfs->dev_name = grub_strdup(name); + dev_notzfs->dev_id = dev->disk->id; + if (zfs_dev_notzfs_list) + dev_notzfs->next = zfs_dev_notzfs_list; + else + dev_notzfs->next = NULL; + zfs_dev_notzfs_list = dev_notzfs; } + if (!original) - nelm = grub_zfs_nvlist_lookup_nvlist_array_get_nelm (nvlist, - ZPOOL_CONFIG_CHILDREN); - - if (nelm <= 0) { - grub_free (type); - return grub_error (GRUB_ERR_BAD_FS, "incorrect mirror VDEV"); + grub_device_close (dev); + grub_free (dev_name); } + return 0; + } - if (!fill->children) + u = 0; + if (grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_POOL_GUID, &u) + && data->guid != u) + { + grub_free (config); + if (!original) { - fill->n_children = nelm; - - fill->children = grub_zalloc (fill->n_children - * sizeof (fill->children[0])); + grub_device_close (dev); + grub_free (dev_name); } - for (i = 0; i < nelm; i++) - { - char *child; - grub_err_t err; + return 0; + } - child = grub_zfs_nvlist_lookup_nvlist_array - (nvlist, ZPOOL_CONFIG_CHILDREN, i); + grub_dprintf ("zfs", "disk from our pool: %s\n", name); - err = fill_vdev_info_real (data, child, &fill->children[i], insert, - inserted, fill->ashift); + err = vdev_attach(data, dev, config, dev_name, original); + grub_free (config); - grub_free (child); + /* check the vdev tree again */ + ctx->missing_vdev = data->n_devices_allocated - data->n_devices_attached; + ctx->missing_children = 0; + for (i = 0; in_devices_allocated; i++) + { + if (data->devices_attached[i].guid == 0) /* not attached */ + continue; - if (err) - { - grub_free (type); - return err; - } - if (fill->children[i].ashift > fill->max_children_ashift) - fill->max_children_ashift = fill->children[i].ashift; + for (j = 0; j < data->devices_attached[i].n_children; j++) + { + if (data->devices_attached[i].children[j].guid == 0) + ctx->missing_children++; } - grub_free (type); - return GRUB_ERR_NONE; } - grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, "vdev %s isn't supported", type); - grub_free (type); - return grub_errno; + if (ctx->missing_vdev == 0 && ctx->missing_children == 0) + { + /* done */ + return 1; + } + + return 0; } +/* + * we iterate over all disks grub_device_iterate() will provide, till + * we have filled all vdevs and children. + */ static grub_err_t -fill_vdev_info (struct grub_zfs_data *data, - char *nvlist, struct grub_zfs_device_desc *diskdesc, - int *inserted) +scan_devices (struct grub_zfs_data *data, char *name, grub_device_t dev) { - grub_uint64_t id; - unsigned i; + struct scan_devices_ctx ctx; - *inserted = 0; + ctx.data = data; + ctx.name = name; /* disk name from zfs_mount() caller */ + ctx.dev = dev; /* disk device from zfs_mount() caller */ + ctx.missing_vdev = 0; + ctx.missing_children = 0; - if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "id", &id)) - return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev id"); + grub_device_iterate (scan_devices_iter, (void *) &ctx); - for (i = 0; i < data->n_devices_attached; i++) - if (data->devices_attached[i].id == id) - return fill_vdev_info_real (data, nvlist, &data->devices_attached[i], - diskdesc, inserted, 0xffffffff); - - data->n_devices_attached++; - if (data->n_devices_attached > data->n_devices_allocated) - { - void *tmp; - data->n_devices_allocated = 2 * data->n_devices_attached + 1; - data->devices_attached - = grub_realloc (tmp = data->devices_attached, - data->n_devices_allocated - * sizeof (data->devices_attached[0])); - if (!data->devices_attached) - { - data->devices_attached = tmp; - return grub_errno; - } - } + /* scan is done, fix device states in vdevs */ - grub_memset (&data->devices_attached[data->n_devices_attached - 1], - 0, sizeof (data->devices_attached[data->n_devices_attached - 1])); + if (ctx.missing_vdev != 0 || ctx.missing_children != 0) + { + return GRUB_ERR_BAD_FS; + } - return fill_vdev_info_real (data, nvlist, - &data->devices_attached[data->n_devices_attached - 1], - diskdesc, inserted, 0xffffffff); + return GRUB_ERR_NONE; } +/* Powers of 2 in the Galois field. */ +static const grub_uint8_t vdev_raidz_pow2[256] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, + 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, + 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, + 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, + 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, + 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, + 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, + 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, + 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, + 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, + 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, + 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, + 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, + 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, + 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, + 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, + 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, + 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, + 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, + 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, + 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, + 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, + 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, + 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, + 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, + 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, + 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, + 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, + 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, + 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, + 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 +}; + +/* Logs of 2 in the Galois field. */ +static const grub_uint8_t vdev_raidz_log2[256] = { + 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, + 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, + 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, + 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, + 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, + 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, + 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, + 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, + 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, + 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, + 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, + 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, + 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, + 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, + 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, + 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, + 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, + 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, + 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, + 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, + 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, + 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, + 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, + 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, + 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, + 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, + 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, + 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, + 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, + 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, + 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, + 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, +}; + /* - * For a given XDR packed nvlist, verify the first 4 bytes and move on. - * - * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : - * - * encoding method/host endian (4 bytes) - * nvl_version (4 bytes) - * nvl_nvflag (4 bytes) - * encoded nvpairs: - * encoded size of the nvpair (4 bytes) - * decoded size of the nvpair (4 bytes) - * name string size (4 bytes) - * name string data (sizeof(NV_ALIGN4(string)) - * data type (4 bytes) - * # of elements in the nvpair (4 bytes) - * data - * 2 zero's for the last nvpair - * (end of the entire list) (8 bytes) - * + * Multiply a given number by 2 raised to the given power. */ +static grub_uint8_t +vdev_raidz_exp2(unsigned a, int exp) +{ + if (a == 0) + return (0); + + exp += vdev_raidz_log2[a]; + if (exp > 255) + exp -= 255; + + return (vdev_raidz_pow2[exp]); +} /* - * The nvlist_next_nvpair() function returns a handle to the next nvpair in the - * list following nvpair. If nvpair is NULL, the first pair is returned. If - * nvpair is the last pair in the nvlist, NULL is returned. + * vdev_raidz_map_get() is hacked from vdev_raidz_map_alloc() in + * usr/src/uts/common/fs/zfs/vdev_raidz.c. If that routine changes, + * this might also need changing. */ -static const char * -nvlist_next_nvpair (const char *nvl, const char *nvpair) -{ - const char *nvp; - int encode_size; - int name_len; - if (nvl == NULL) - return NULL; - if (nvpair == NULL) - { - /* skip over header, nvl_version and nvl_nvflag */ - nvpair = nvl + 4 * 3; - } - else +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef roundup +#define roundup(x, y) (grub_divmod64 ( (x)+((y)-1), (y), NULL) *(y)) +#endif +#ifndef offsetof +#define offsetof(s, m) ((grub_size_t)(&(((s *)0)->m))) +#endif + +static raidz_map_t * +vdev_raidz_map_alloc(void *data, grub_uint64_t size, grub_uint64_t offset, + grub_uint64_t unit_shift, grub_uint64_t dcols, + grub_uint64_t nparity) +{ + raidz_map_t *rm; + grub_uint64_t b = offset >> unit_shift; + grub_uint64_t s = size >> unit_shift; + grub_uint64_t f, o, q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; + + o = grub_divmod64(b, dcols, NULL) << unit_shift; + (void) grub_divmod64(b, dcols, &f); + q = grub_divmod64(s, dcols - nparity, NULL); + r = s - q * (dcols - nparity); + bc = (r == 0 ? 0 : r + nparity); + tot = s + nparity * (q + (r == 0 ? 0 : 1)); + + if (q == 0) { - /* skip to the next nvpair */ - encode_size = grub_be_to_cpu32 (grub_get_unaligned32(nvpair)); - nvpair += encode_size; - /*If encode_size equals 0 nvlist_next_nvpair would return - * the same pair received in input, leading to an infinite loop. - * If encode_size is less than 0, this will move the pointer - * backwards, *possibly* examinining two times the same nvpair - * and potentially getting into an infinite loop. */ - if(encode_size <= 0) - { - grub_dprintf ("zfs", "nvpair with size <= 0\n"); - grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); - return NULL; - } + acols = bc; + scols = MIN(dcols, roundup(bc, nparity + 1)); } - /* 8 bytes of 0 marks the end of the list */ - if (grub_get_unaligned64 (nvpair) == 0) - return NULL; - /*consistency checks*/ - if (nvpair + 4 * 3 >= nvl + VDEV_PHYS_SIZE) + else { - grub_dprintf ("zfs", "nvlist overflow\n"); - grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); - return NULL; + acols = dcols; + scols = dcols; } - encode_size = grub_be_to_cpu32 (grub_get_unaligned32(nvpair)); - nvp = nvpair + 4*2; - name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); - nvp += 4; + rm = grub_malloc(offsetof(raidz_map_t, rm_col[scols])); - nvp = nvp + ((name_len + 3) & ~3); // align - if (nvp + 4 >= nvl + VDEV_PHYS_SIZE - || encode_size < 0 - || nvp + 4 + encode_size > nvl + VDEV_PHYS_SIZE) + if (rm == NULL) { - grub_dprintf ("zfs", "nvlist overflow\n"); - grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); return NULL; } - /* end consistency checks */ - return nvpair; -} + rm->rm_cols = acols; + rm->rm_scols = scols; + rm->rm_bigcols = bc; + rm->rm_skipstart = bc; + rm->rm_missingdata = 0; + rm->rm_missingparity = 0; + rm->rm_firstdatacol = nparity; + rm->rm_datacopy = NULL; + rm->rm_reports = 0; + rm->rm_freed = 0; + rm->rm_ecksuminjected = 0; + + asize = 0; + + for (c = 0; c < scols; c++) + { + col = f + c; + coff = o; + if (col >= dcols) + { + col -= dcols; + coff += 1ULL << unit_shift; + } + rm->rm_col[c].rc_devidx = col; + rm->rm_col[c].rc_offset = coff; + rm->rm_col[c].rc_data = NULL; + rm->rm_col[c].rc_gdata = NULL; + rm->rm_col[c].rc_error = 0; + rm->rm_col[c].rc_tried = 0; + rm->rm_col[c].rc_skipped = 0; + + if (c >= acols) + rm->rm_col[c].rc_size = 0; + else if (c < bc) + rm->rm_col[c].rc_size = (q + 1) << unit_shift; + else + rm->rm_col[c].rc_size = q << unit_shift; -/* - * This function returns 0 on success and 1 on failure. On success, a string - * containing the name of nvpair is saved in buf. - */ -static int -nvpair_name (const char *nvp, char **buf, grub_size_t *buflen) -{ - /* skip over encode/decode size */ - nvp += 4 * 2; - - *buf = (char *) (nvp + 4); - *buflen = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + asize += rm->rm_col[c].rc_size; + } - return 0; -} + rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift); + rm->rm_nskip = roundup(tot, nparity + 1) - tot; -/* - * This function retrieves the value of the nvpair in the form of enumerated - * type data_type_t. - */ -static int -nvpair_type (const char *nvp) -{ - int name_len, type; + for (c = 0; c < rm->rm_firstdatacol; c++) + rm->rm_col[c].rc_data = grub_malloc (rm->rm_col[c].rc_size); - /* skip over encode/decode size */ - nvp += 4 * 2; + rm->rm_col[c].rc_data = data; - /* skip over name_len */ - name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); - nvp += 4; + for (c = c + 1; c < acols; c++) + rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data + + rm->rm_col[c - 1].rc_size; - /* skip over name */ - nvp = nvp + ((name_len + 3) & ~3); /* align */ + /* + * If all data stored spans all columns, there's a danger that parity + * will always be on the same device and, since parity isn't read + * during normal operation, that that device's I/O bandwidth won't be + * used effectively. We therefore switch the parity every 1MB. + * + * ... at least that was, ostensibly, the theory. As a practical + * matter unless we juggle the parity between all devices evenly, we + * won't see any benefit. Further, occasional writes that aren't a + * multiple of the LCM of the number of children and the minimum + * stripe width are sufficient to avoid pessimal behavior. + * Unfortunately, this decision created an implicit on-disk format + * requirement that we need to support for all eternity, but only + * for single-parity RAID-Z. + * + * If we intend to skip a sector in the zeroth column for padding + * we must make sure to note this swap. We will never intend to + * skip the first column since at least one data and one parity + * column must appear in each row. + */ - type = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + if (rm->rm_firstdatacol == 1 && (offset & (1ULL << 20))) + { + devidx = rm->rm_col[0].rc_devidx; + o = rm->rm_col[0].rc_offset; + rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx; + rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset; + rm->rm_col[1].rc_devidx = devidx; + rm->rm_col[1].rc_offset = o; - return type; + if (rm->rm_skipstart == 0) + rm->rm_skipstart = 1; + } + + return (rm); } -static int -nvpair_value (const char *nvp,char **val, - grub_size_t *size_out, grub_size_t *nelm_out) +static void +vdev_raidz_map_free(raidz_map_t *rm) { - int name_len,nelm,encode_size; + grub_uint64_t c; - /* skip over encode/decode size */ - encode_size = grub_be_to_cpu32 (grub_get_unaligned32(nvp)); - nvp += 8; + for (c = 0; c < rm->rm_firstdatacol; c++) + { + grub_free (rm->rm_col[c].rc_data); - /* skip over name_len */ - name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); - nvp += 4; - - /* skip over name */ - nvp = nvp + ((name_len + 3) & ~3); /* align */ - - /* skip over type */ - nvp += 4; - nelm = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); - nvp +=4; - if (nelm < 1) - { - grub_error (GRUB_ERR_BAD_FS, "empty nvpair"); - return 0; + if (rm->rm_col[c].rc_gdata != NULL) + grub_free (rm->rm_col[c].rc_gdata); } - *val = (char *) nvp; - *size_out = encode_size; - if (nelm_out) - *nelm_out = nelm; - - return 1; + + if (rm->rm_datacopy != NULL) + grub_free (rm->rm_datacopy); + grub_free (rm); } -/* - * Check the disk label information and retrieve needed vdev name-value pairs. - * - */ static grub_err_t -check_pool_label (struct grub_zfs_data *data, - struct grub_zfs_device_desc *diskdesc, - int *inserted) +zio_checksum_error(blkptr_t *bp, grub_zfs_endian_t endian, void *buf) { - grub_uint64_t pool_state, txg = 0; - char *nvlist,*features; -#if 0 - char *nv; -#endif - grub_uint64_t poolguid; - grub_uint64_t version; - int found; - grub_err_t err; - grub_zfs_endian_t endian; - vdev_phys_t *phys; - zio_cksum_t emptycksum; - - *inserted = 0; - - err = zfs_fetch_nvlist (diskdesc, &nvlist); - if (err) - return err; - - phys = (vdev_phys_t*) nvlist; - if (grub_zfs_to_cpu64 (phys->vp_zbt.zec_magic, - GRUB_ZFS_LITTLE_ENDIAN) - == ZEC_MAGIC) - endian = GRUB_ZFS_LITTLE_ENDIAN; - else if (grub_zfs_to_cpu64 (phys->vp_zbt.zec_magic, - GRUB_ZFS_BIG_ENDIAN) - == ZEC_MAGIC) - endian = GRUB_ZFS_BIG_ENDIAN; - else - { - grub_free (nvlist); - return grub_error (GRUB_ERR_BAD_FS, - "bad vdev_phys_t.vp_zbt.zec_magic number"); - } - /* Now check the integrity of the vdev_phys_t structure though checksum. */ - ZIO_SET_CHECKSUM(&emptycksum, diskdesc->vdev_phys_sector << 9, 0, 0, 0); - err = zio_checksum_verify (emptycksum, ZIO_CHECKSUM_LABEL, endian, - nvlist, VDEV_PHYS_SIZE); - if (err) - return err; + zio_cksum_t zc; + grub_uint32_t checksum; + grub_size_t size; + grub_uint64_t offset; - grub_dprintf ("zfs", "check 2 passed\n"); + grub_memset (&zc, 0, sizeof (zc)); - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_STATE, - &pool_state); - if (! found) + if (BP_IS_GANG(bp, endian)) { - grub_free (nvlist); - if (! grub_errno) - grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_STATE " not found"); - return grub_errno; + dva_t *dva = BP_IDENTITY(bp); + grub_uint64_t txg = BP_PHYSICAL_BIRTH(bp); + checksum = ZIO_CHECKSUM_GANG_HEADER; + size = SPA_GANGBLOCKSIZE; + offset = DVA_GET_OFFSET (dva, endian); + ZIO_SET_CHECKSUM (&zc, DVA_GET_VDEV(dva), offset, txg, 0); } - grub_dprintf ("zfs", "check 3 passed\n"); - - if (pool_state == POOL_STATE_DESTROYED) + else { - grub_free (nvlist); - return grub_error (GRUB_ERR_BAD_FS, "zpool is marked as destroyed"); + checksum = BP_GET_CHECKSUM(bp, endian); + size = BP_GET_PSIZE(bp, endian); + zc = bp->blk_cksum; } - grub_dprintf ("zfs", "check 4 passed\n"); + return zio_checksum_verify (zc, checksum, endian, buf, size); +} - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG, &txg); - if (!found) - { - grub_free (nvlist); - if (! grub_errno) - grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_TXG " not found"); - return grub_errno; - } - grub_dprintf ("zfs", "check 6 passed\n"); +static void +vdev_raidz_generate_parity_pq(raidz_map_t *rm) +{ + grub_uint64_t *p, *q, *src, pcnt, ccnt, mask, i; + int c; - /* not an active device */ - if (txg == 0) - { - grub_free (nvlist); - return grub_error (GRUB_ERR_BAD_FS, "zpool isn't active"); - } - grub_dprintf ("zfs", "check 7 passed\n"); + pcnt = grub_divmod64(rm->rm_col[VDEV_RAIDZ_P].rc_size, sizeof (src[0]), NULL); - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_VERSION, - &version); - if (! found) + for (c = rm->rm_firstdatacol; c < (int) rm->rm_cols; c++) { - grub_free (nvlist); - if (! grub_errno) - grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_VERSION " not found"); - return grub_errno; - } - grub_dprintf ("zfs", "check 8 passed\n"); + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; - if (!SPA_VERSION_IS_SUPPORTED(version)) - { - grub_free (nvlist); - return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, - "too new version %llu > %llu", - (unsigned long long) version, - (unsigned long long) SPA_VERSION_BEFORE_FEATURES); - } - grub_dprintf ("zfs", "check 9 passed\n"); + ccnt = grub_divmod64(rm->rm_col[c].rc_size, sizeof (src[0]), NULL); - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID, - &(diskdesc->guid)); - if (! found) - { - grub_free (nvlist); - if (! grub_errno) - grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_GUID " not found"); - return grub_errno; - } + if (c == (int) rm->rm_firstdatacol) + { + for (i = 0; i < ccnt; i++, src++, p++, q++) + { + *p = *src; + *q = *src; + } + for (; i < pcnt; i++, src++, p++, q++) + { + *p = 0; + *q = 0; + } + } + else + { + for (i = 0; i < ccnt; i++, src++, p++, q++) + { + *p ^= *src; - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID, - &poolguid); - if (! found) - { - grub_free (nvlist); - if (! grub_errno) - grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_GUID " not found"); - return grub_errno; - } + VDEV_RAIDZ_64MUL_2(*q, mask); + *q ^= *src; + } - grub_dprintf ("zfs", "check 11 passed\n"); + /* + * Treat short columns as though they are full of 0s. + * Note that there's therefore nothing needed for P. + */ + for (; i < pcnt; i++, q++) + VDEV_RAIDZ_64MUL_2(*q, mask); + } + } +} - if (data->mounted && data->guid != poolguid) - return grub_error (GRUB_ERR_BAD_FS, "another zpool"); - else - data->guid = poolguid; +static int +vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts) +{ + grub_uint64_t *dst, *src, xcount, ccount, count, i; + int x = tgts[0]; + int c; - { - char *nv; - nv = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE); + xcount = grub_divmod64(rm->rm_col[x].rc_size, sizeof (src[0]), NULL); - if (!nv) - { - grub_free (nvlist); - return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev tree"); - } - err = fill_vdev_info (data, nv, diskdesc, inserted); - if (err) - { - grub_free (nv); - grub_free (nvlist); - return err; - } - grub_free (nv); - } - grub_dprintf ("zfs", "check 10 passed\n"); - features = grub_zfs_nvlist_lookup_nvlist(nvlist, - ZPOOL_CONFIG_FEATURES_FOR_READ); - if (features) + src = rm->rm_col[VDEV_RAIDZ_P].rc_data; + dst = rm->rm_col[x].rc_data; + for (i = 0; i < xcount; i++, dst++, src++) + *dst = *src; + for (c = rm->rm_firstdatacol; c < (int) rm->rm_cols; c++) { - const char *nvp=NULL; - char name[MAX_SUPPORTED_FEATURE_STRLEN + 1]; - char *nameptr; - grub_size_t namelen; - while ((nvp = nvlist_next_nvpair(features, nvp)) != NULL) - { - nvpair_name (nvp, &nameptr, &namelen); - if(namelen > MAX_SUPPORTED_FEATURE_STRLEN) - namelen = MAX_SUPPORTED_FEATURE_STRLEN; - grub_memcpy (name, nameptr, namelen); - name[namelen] = '\0'; - grub_dprintf("zfs","str=%s\n",name); - if (check_feature(name,1, NULL) != 0) - { - grub_dprintf("zfs","feature missing in check_pool_label:%s\n",name); - err= grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET," check_pool_label missing feature '%s' for read",name); - return err; - } + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; + if (c == x) + continue; + + ccount = grub_divmod64(rm->rm_col[c].rc_size, sizeof (src[0]), NULL); + count = MIN(ccount, xcount); + + for (i = 0; i < count; i++, dst++, src++) + { + *dst ^= *src; } } - grub_dprintf ("zfs", "check 12 passed (feature flags)\n"); - grub_free (nvlist); - return GRUB_ERR_NONE; + return (1 << VDEV_RAIDZ_P); } -static grub_err_t -scan_disk (grub_device_t dev, struct grub_zfs_data *data, - int original, int *inserted) +static int +vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts) { - int label = 0; - uberblock_phys_t *ub_array, *ubbest = NULL; - vdev_boot_header_t *bh; - grub_err_t err; - int vdevnum; - struct grub_zfs_device_desc desc; + grub_uint64_t *dst, *src, xcount, ccount, count, mask, i; + grub_uint8_t *b; + int x = tgts[0]; + int c, j, exp; - ub_array = grub_malloc (VDEV_UBERBLOCK_RING); - if (!ub_array) - return grub_errno; + xcount = grub_divmod64(rm->rm_col[x].rc_size, sizeof (src[0]), NULL); - bh = grub_malloc (VDEV_BOOT_HEADER_SIZE); - if (!bh) + for (c = rm->rm_firstdatacol; c < (int) rm->rm_cols; c++) { - grub_free (ub_array); - return grub_errno; - } - - vdevnum = VDEV_LABELS; - - desc.dev = dev; - desc.original = original; + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; - /* Don't check back labels on CDROM. */ - if (grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN) - vdevnum = VDEV_LABELS / 2; + if (c == x) + ccount = 0; + else + ccount = grub_divmod64(rm->rm_col[c].rc_size, sizeof (src[0]), NULL); - for (label = 0; ubbest == NULL && label < vdevnum; label++) - { - desc.vdev_phys_sector - = label * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT) - + ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT) - + (label < VDEV_LABELS / 2 ? 0 : - ALIGN_DOWN (grub_disk_get_size (dev->disk), sizeof (vdev_label_t)) - - VDEV_LABELS * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT)); - - /* Read in the uberblock ring (128K). */ - err = grub_disk_read (dev->disk, desc.vdev_phys_sector - + (VDEV_PHYS_SIZE >> SPA_MINBLOCKSHIFT), - 0, VDEV_UBERBLOCK_RING, (char *) ub_array); - if (err) + count = MIN(ccount, xcount); + if (c == (int) rm->rm_firstdatacol) { - grub_errno = GRUB_ERR_NONE; - continue; - } - grub_dprintf ("zfs", "label ok %d\n", label); + for (i = 0; i < count; i++, dst++, src++) + *dst = *src; - err = check_pool_label (data, &desc, inserted); - if (err || !*inserted) - { - grub_errno = GRUB_ERR_NONE; - continue; + for (; i < xcount; i++, dst++) + *dst = 0; } - - ubbest = find_bestub (ub_array, &desc); - if (!ubbest) + else { - grub_dprintf ("zfs", "No uberblock found\n"); - grub_errno = GRUB_ERR_NONE; - continue; + for (i = 0; i < count; i++, dst++, src++) + { + VDEV_RAIDZ_64MUL_2(*dst, mask); + *dst ^= *src; + } + + for (; i < xcount; i++, dst++) + VDEV_RAIDZ_64MUL_2(*dst, mask); } + } - grub_memmove (&(desc.current_uberblock), - &ubbest->ubp_uberblock, sizeof (uberblock_t)); - if (original) - grub_memmove (&(data->current_uberblock), - &ubbest->ubp_uberblock, sizeof (uberblock_t)); + src = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + dst = rm->rm_col[x].rc_data; + exp = 255 - (rm->rm_cols - 1 - x); -#if 0 - if (find_best_root && - vdev_uberblock_compare (&ubbest->ubp_uberblock, - &(current_uberblock)) <= 0) - continue; -#endif - grub_free (ub_array); - grub_free (bh); - return GRUB_ERR_NONE; + for (i = 0; i < xcount; i++, dst++, src++) + { + *dst ^= *src; + for (j = 0, b = (grub_uint8_t *)dst; j < 8; j++, b++) + *b = vdev_raidz_exp2(*b, exp); } - - grub_free (ub_array); - grub_free (bh); - return grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid label"); + return (1 << VDEV_RAIDZ_Q); } -/* Helper for scan_devices. */ static int -scan_devices_iter (const char *name, void *hook_data) +vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts) { - struct grub_zfs_data *data = hook_data; - grub_device_t dev; - grub_err_t err; - int inserted; + grub_uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp; + void *pdata, *qdata; + grub_uint64_t xsize, ysize, i; + int x = tgts[0]; + int y = tgts[1]; - dev = grub_device_open (name); - if (!dev) - return 0; - if (!dev->disk) - { - grub_device_close (dev); - return 0; - } - err = scan_disk (dev, data, 0, &inserted); - if (err == GRUB_ERR_BAD_FS) - { - grub_device_close (dev); - grub_errno = GRUB_ERR_NONE; - return 0; - } - if (err) + /* + * Move the parity data aside -- we're going to compute parity as + * though columns x and y were full of zeros -- Pxy and Qxy. We want to + * reuse the parity generation mechanism without trashing the actual + * parity so we make those columns appear to be full of zeros by + * setting their lengths to zero. + */ + pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xsize = rm->rm_col[x].rc_size; + ysize = rm->rm_col[y].rc_size; + + rm->rm_col[VDEV_RAIDZ_P].rc_data = + grub_malloc(rm->rm_col[VDEV_RAIDZ_P].rc_size); + rm->rm_col[VDEV_RAIDZ_Q].rc_data = + grub_malloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size); + rm->rm_col[x].rc_size = 0; + rm->rm_col[y].rc_size = 0; + + vdev_raidz_generate_parity_pq(rm); + + rm->rm_col[x].rc_size = xsize; + rm->rm_col[y].rc_size = ysize; + + p = pdata; + q = qdata; + pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xd = rm->rm_col[x].rc_data; + yd = rm->rm_col[y].rc_data; + + /* + * We now have: + * Pxy = P + D_x + D_y + * Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y + * + * We can then solve for D_x: + * D_x = A * (P + Pxy) + B * (Q + Qxy) + * where + * A = 2^(x - y) * (2^(x - y) + 1)^-1 + * B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1 + * + * With D_x in hand, we can easily solve for D_y: + * D_y = P + Pxy + D_x + */ + + a = vdev_raidz_pow2[255 + x - y]; + b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)]; + tmp = 255 - vdev_raidz_log2[a ^ 1]; + + aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)]; + bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)]; + + for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) { - grub_device_close (dev); - grub_print_error (); - return 0; + *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^ vdev_raidz_exp2(*q ^ *qxy, bexp); + + if (i < ysize) + *yd = *p ^ *pxy ^ *xd; } - if (!inserted) - grub_device_close (dev); - - return 0; + grub_free(rm->rm_col[VDEV_RAIDZ_P].rc_data); + grub_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data); + + /* + * Restore the saved parity data. + */ + rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata; + rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata; + + return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q)); } -static grub_err_t -scan_devices (struct grub_zfs_data *data) +static void +vdev_raidz_matrix_init(int n, int nmap, int *map, grub_uint8_t **rows) { - grub_device_iterate (scan_devices_iter, data); - return GRUB_ERR_NONE; + int i, j; + int pow; + + /* + * Fill in the missing rows of interest. + */ + for (i = 0; i < nmap; i++) + { + pow = map[i] * n; + if (pow > 255) + pow -= 255; + + for (j = 0; j < n; j++) + { + pow -= map[i]; + if (pow < 0) + pow += 255; + rows[i][j] = vdev_raidz_pow2[pow]; + } + } } -/* x**y. */ -static grub_uint8_t powx[255 * 2]; -/* Such an s that x**s = y */ -static int powx_inv[256]; -static const grub_uint8_t poly = 0x1d; - -/* perform the operation a ^= b * (x ** (known_idx * recovery_pow) ) */ -static inline void -xor_out (grub_uint8_t *a, const grub_uint8_t *b, grub_size_t s, - unsigned known_idx, unsigned recovery_pow) +static void +vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, + grub_uint8_t **rows, grub_uint8_t **invrows, const grub_uint8_t *used) { - unsigned add; + int i, j, ii, jj; + grub_uint8_t log; - /* Simple xor. */ - if (known_idx == 0 || recovery_pow == 0) + /* + * First initialize the storage where we'll compute the inverse rows. + */ + for (i = 0; i < nmissing; i++) { - grub_crypto_xor (a, a, b, s); - return; + for (j = 0; j < n; j++) + invrows[i][j] = (i == j) ? 1 : 0; + } + + /* + * Subtract all trivial rows from the rows of consequence. + */ + for (i = 0; i < nmissing; i++) + { + for (j = nmissing; j < n; j++) + { + jj = used[j] - rm->rm_firstdatacol; + invrows[i][j] = rows[i][jj]; + rows[i][jj] = 0; + } + } + + /* + * For each of the rows of interest, we must normalize it and subtract + * a multiple of it from the other rows. + */ + for (i = 0; i < nmissing; i++) + { + /* + * Compute the inverse of the first element and multiply each + * element in the row by that value. + */ + log = 255 - vdev_raidz_log2[rows[i][missing[i]]]; + + for (j = 0; j < n; j++) + { + rows[i][j] = vdev_raidz_exp2(rows[i][j], log); + invrows[i][j] = vdev_raidz_exp2(invrows[i][j], log); + } + + for (ii = 0; ii < nmissing; ii++) + { + if (i == ii) + continue; + + log = vdev_raidz_log2[rows[ii][missing[i]]]; + + for (j = 0; j < n; j++) + { + rows[ii][j] ^= vdev_raidz_exp2(rows[i][j], log); + invrows[ii][j] ^= vdev_raidz_exp2(invrows[i][j], log); + } + } } - add = (known_idx * recovery_pow) % 255; - for (;s--; b++, a++) - if (*b) - *a ^= powx[powx_inv[*b] + add]; } -static inline grub_uint8_t -gf_mul (grub_uint8_t a, grub_uint8_t b) +static void +vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, + int *missing, grub_uint8_t **invrows, const grub_uint8_t *used) { - if (a == 0 || b == 0) - return 0; - return powx[powx_inv[a] + powx_inv[b]]; -} + int i, j, x, cc, c; + grub_uint8_t *src; + grub_uint64_t ccount; + grub_uint8_t *dst[VDEV_RAIDZ_MAXPARITY]; + grub_uint64_t dcount[VDEV_RAIDZ_MAXPARITY]; + grub_uint8_t log = 0; + grub_uint8_t val; + int ll; + grub_uint8_t *invlog[VDEV_RAIDZ_MAXPARITY]; + grub_uint8_t *p, *pp; + grub_size_t psize; -#define MAX_NBUFS 4 + psize = sizeof (invlog[0][0]) * n * nmissing; + p = grub_malloc(psize); -static grub_err_t -recovery (grub_uint8_t *bufs[4], grub_size_t s, const int nbufs, - const unsigned *powers, - const unsigned *idx) -{ - grub_dprintf ("zfs", "recovering %u buffers\n", nbufs); - /* Now we have */ - /* b_i = sum (r_j* (x ** (powers[i] * idx[j])))*/ - /* Let's invert the matrix in question. */ - switch (nbufs) + for (pp = p, i = 0; i < nmissing; i++) { - /* Easy: r_0 = bufs[0] / (x << (powers[i] * idx[j])). */ - case 1: - { - int add; - grub_uint8_t *a; - if (powers[0] == 0 || idx[0] == 0) - return GRUB_ERR_NONE; - add = 255 - ((powers[0] * idx[0]) % 255); - for (a = bufs[0]; s--; a++) - if (*a) - *a = powx[powx_inv[*a] + add]; - return GRUB_ERR_NONE; - } - /* Case 2x2: Let's use the determinant formula. */ - case 2: - { - grub_uint8_t det, det_inv; - grub_uint8_t matrixinv[2][2]; - unsigned i; - /* The determinant is: */ - det = (powx[(powers[0] * idx[0] + powers[1] * idx[1]) % 255] - ^ powx[(powers[0] * idx[1] + powers[1] * idx[0]) % 255]); - if (det == 0) - return grub_error (GRUB_ERR_BAD_FS, "singular recovery matrix"); - det_inv = powx[255 - powx_inv[det]]; - matrixinv[0][0] = gf_mul (powx[(powers[1] * idx[1]) % 255], det_inv); - matrixinv[1][1] = gf_mul (powx[(powers[0] * idx[0]) % 255], det_inv); - matrixinv[0][1] = gf_mul (powx[(powers[0] * idx[1]) % 255], det_inv); - matrixinv[1][0] = gf_mul (powx[(powers[1] * idx[0]) % 255], det_inv); - for (i = 0; i < s; i++) - { - grub_uint8_t b0, b1; - b0 = bufs[0][i]; - b1 = bufs[1][i]; - - bufs[0][i] = (gf_mul (b0, matrixinv[0][0]) - ^ gf_mul (b1, matrixinv[0][1])); - bufs[1][i] = (gf_mul (b0, matrixinv[1][0]) - ^ gf_mul (b1, matrixinv[1][1])); - } - return GRUB_ERR_NONE; - } - /* Otherwise use Gauss. */ - case 3: - { - grub_uint8_t matrix1[MAX_NBUFS][MAX_NBUFS], matrix2[MAX_NBUFS][MAX_NBUFS]; - int i, j, k; + invlog[i] = pp; + pp += n; + } + + for (i = 0; i < nmissing; i++) + { + for (j = 0; j < n; j++) + invlog[i][j] = vdev_raidz_log2[invrows[i][j]]; + } + + for (i = 0; i < n; i++) + { + c = used[i]; - for (i = 0; i < nbufs; i++) - for (j = 0; j < nbufs; j++) - matrix1[i][j] = powx[(powers[i] * idx[j]) % 255]; - for (i = 0; i < nbufs; i++) - for (j = 0; j < nbufs; j++) - matrix2[i][j] = 0; - for (i = 0; i < nbufs; i++) - matrix2[i][i] = 1; + src = rm->rm_col[c].rc_data; + ccount = rm->rm_col[c].rc_size; + for (j = 0; j < nmissing; j++) + { + cc = missing[j] + rm->rm_firstdatacol; + + dst[j] = rm->rm_col[cc].rc_data; + dcount[j] = rm->rm_col[cc].rc_size; + } - for (i = 0; i < nbufs; i++) + for (x = 0; x < (int) ccount; x++, src++) { - grub_uint8_t mul; - for (j = i; j < nbufs; j++) - if (matrix1[i][j]) - break; - if (j == nbufs) - return grub_error (GRUB_ERR_BAD_FS, "singular recovery matrix"); - if (j != i) + if (*src != 0) + log = vdev_raidz_log2[*src]; + + for (cc = 0; cc < nmissing; cc++) { - int xchng; - xchng = j; - for (j = 0; j < nbufs; j++) - { - grub_uint8_t t; - t = matrix1[xchng][j]; - matrix1[xchng][j] = matrix1[i][j]; - matrix1[i][j] = t; - } - for (j = 0; j < nbufs; j++) + if (x >= (int) dcount[cc]) + continue; + + if (*src == 0) + val = 0; + else { - grub_uint8_t t; - t = matrix2[xchng][j]; - matrix2[xchng][j] = matrix2[i][j]; - matrix2[i][j] = t; + if ((ll = log + invlog[cc][i]) >= 255) + ll -= 255; + val = vdev_raidz_pow2[ll]; } - } - mul = powx[255 - powx_inv[matrix1[i][i]]]; - for (j = 0; j < nbufs; j++) - matrix1[i][j] = gf_mul (matrix1[i][j], mul); - for (j = 0; j < nbufs; j++) - matrix2[i][j] = gf_mul (matrix2[i][j], mul); - for (j = i + 1; j < nbufs; j++) - { - mul = matrix1[j][i]; - for (k = 0; k < nbufs; k++) - matrix1[j][k] ^= gf_mul (matrix1[i][k], mul); - for (k = 0; k < nbufs; k++) - matrix2[j][k] ^= gf_mul (matrix2[i][k], mul); - } - } - for (i = nbufs - 1; i >= 0; i--) - { - for (j = 0; j < i; j++) - { - grub_uint8_t mul; - mul = matrix1[j][i]; - for (k = 0; k < nbufs; k++) - matrix1[j][k] ^= gf_mul (matrix1[i][k], mul); - for (k = 0; k < nbufs; k++) - matrix2[j][k] ^= gf_mul (matrix2[i][k], mul); - } - } - for (i = 0; i < (int) s; i++) - { - grub_uint8_t b[MAX_NBUFS]; - for (j = 0; j < nbufs; j++) - b[j] = bufs[j][i]; - for (j = 0; j < nbufs; j++) - { - bufs[j][i] = 0; - for (k = 0; k < nbufs; k++) - bufs[j][i] ^= gf_mul (matrix2[j][k], b[k]); + if (i == 0) + dst[cc][x] = val; + else + dst[cc][x] ^= val; } } - return GRUB_ERR_NONE; - } - default: - return grub_error (GRUB_ERR_BUG, "too big matrix"); - } + } + + grub_free(p); } -static grub_err_t -read_device (grub_uint64_t offset, struct grub_zfs_device_desc *desc, - grub_size_t len, void *buf) +static int +vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) { - switch (desc->type) + int n, i, c, t, tt; + int nmissing_rows; + int missing_rows[VDEV_RAIDZ_MAXPARITY]; + int parity_map[VDEV_RAIDZ_MAXPARITY]; + + grub_uint8_t *p, *pp; + grub_size_t psize; + + grub_uint8_t *rows[VDEV_RAIDZ_MAXPARITY]; + grub_uint8_t *invrows[VDEV_RAIDZ_MAXPARITY]; + grub_uint8_t *used; + + int code = 0; + + + n = rm->rm_cols - rm->rm_firstdatacol; + + /* + * Figure out which data columns are missing. + */ + nmissing_rows = 0; + for (t = 0; t < ntgts; t++) { - case DEVICE_LEAF: - { - grub_uint64_t sector; - sector = DVA_OFFSET_TO_PHYS_SECTOR (offset); - if (!desc->dev) - { - return grub_error (GRUB_ERR_BAD_FS, + if (tgts[t] >= (int) rm->rm_firstdatacol) + missing_rows[nmissing_rows++] = tgts[t] - rm->rm_firstdatacol; + } + + /* + * Figure out which parity columns to use to help generate the missing + * data columns. + */ + for (tt = 0, c = 0, i = 0; i < nmissing_rows; c++) + { + /* + * Skip any targeted parity columns. + */ + if (c == tgts[tt]) + { + tt++; + continue; + } + + code |= 1 << c; + + parity_map[i] = c; + i++; + } + + psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) * + nmissing_rows * n + sizeof (used[0]) * n; + p = grub_malloc(psize); + + for (pp = p, i = 0; i < nmissing_rows; i++) + { + rows[i] = pp; + pp += n; + invrows[i] = pp; + pp += n; + } + used = pp; + + for (i = 0; i < nmissing_rows; i++) + used[i] = parity_map[i]; + + for (tt = 0, c = rm->rm_firstdatacol; c < (int) rm->rm_cols; c++) + { + if (tt < (int) nmissing_rows && + c == missing_rows[tt] + (int) rm->rm_firstdatacol) + { + tt++; + continue; + } + + used[i] = c; + i++; + } + + /* + * Initialize the interesting rows of the matrix. + */ + vdev_raidz_matrix_init(n, nmissing_rows, parity_map, rows); + + /* + * Invert the matrix. + */ + vdev_raidz_matrix_invert(rm, n, nmissing_rows, missing_rows, rows, + invrows, used); + + /* + * Reconstruct the missing data using the generated matrix. + */ + vdev_raidz_matrix_reconstruct(rm, n, nmissing_rows, missing_rows, + invrows, used); + + grub_free(p); + + return (code); +} + +static int +vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt) +{ + int tgts[VDEV_RAIDZ_MAXPARITY], *dt; + int ntgts; + int i, c; + int code; + int nbadparity, nbaddata; + int parity_valid[VDEV_RAIDZ_MAXPARITY]; + + nbadparity = rm->rm_firstdatacol; + nbaddata = rm->rm_cols - nbadparity; + ntgts = 0; + for (i = 0, c = 0; c < (int) rm->rm_cols; c++) + { + if (c < (int) rm->rm_firstdatacol) + parity_valid[c] = B_FALSE; + + if (i < nt && c == t[i]) + { + tgts[ntgts++] = c; + i++; + } + else if (rm->rm_col[c].rc_error != 0) + { + tgts[ntgts++] = c; + } + else if (c >= (int) rm->rm_firstdatacol) + { + nbaddata--; + } + else + { + parity_valid[c] = B_TRUE; + nbadparity--; + } + } + + dt = &tgts[nbadparity]; + + switch (nbaddata) + { + case 1: + if (parity_valid[VDEV_RAIDZ_P]) + return (vdev_raidz_reconstruct_p(rm, dt)); + + if (parity_valid[VDEV_RAIDZ_Q]) + return (vdev_raidz_reconstruct_q(rm, dt)); + + break; + case 2: + if (parity_valid[VDEV_RAIDZ_P] && parity_valid[VDEV_RAIDZ_Q]) + return (vdev_raidz_reconstruct_pq(rm, dt)); + + break; + } + + code = vdev_raidz_reconstruct_general(rm, tgts, ntgts); + return (code); +} + +static mirror_map_t * +vdev_mirror_map_alloc(struct grub_zfs_device_desc *vd, grub_uint64_t offset, + grub_uint64_t children) +{ + mirror_map_t *mm = NULL; + mirror_child_t *mc; + int c; + + c = (int) children; + mm = grub_zalloc(offsetof(mirror_map_t, mm_child[c])); + mm->mm_children = c; + mm->mm_replacing = B_FALSE; + mm->mm_preferred = ((int)offset >> 21) % c; + mm->mm_root = B_TRUE; + mm->mm_offset = offset; + + for (c = 0; c < mm->mm_children; c++) + { + mc = &mm->mm_child[c]; + mc->mc_vd = &vd->children[c]; + mc->mc_offset = offset; + } + + return (mm); +} + +static int +vdev_mirror_child_select(mirror_map_t *mm) +{ + mirror_child_t *mc = NULL; + int c, i; + + for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) + { + if (c >= mm->mm_children) + c = 0; + mc = &mm->mm_child[c]; + if (mc->mc_tried || mc->mc_skipped) + continue; + if (mc->mc_vd->dev_state == DEVICE_ERROR) + { + mc->mc_error = GRUB_ERR_IO; + mc->mc_tried = 1; /* don't even try */ + mc->mc_skipped = 1; + continue; + } + return (c); + } + for (c = 0; c < mm->mm_children; c++) + if (!mm->mm_child[c].mc_tried) + return (c); + + return -1; +} + +static grub_err_t +read_device (grub_uint64_t offset, struct grub_zfs_device_desc *desc, + grub_size_t len, void *buf, void **priv) +{ + grub_err_t err = GRUB_ERR_NONE; + + if (priv != NULL) + *priv = NULL; + + switch (desc->type) + { + case DEVICE_LEAF: + { + grub_uint64_t sector; + sector = DVA_OFFSET_TO_PHYS_SECTOR (offset); + grub_dprintf("zfs", "read_device: offset %llx sector %llx\n", + (unsigned long long) offset, + (unsigned long long) sector); + if (!desc->dev) + { + return grub_error (GRUB_ERR_BAD_FS, N_("couldn't find a necessary member device " "of multi-device filesystem")); } + if (desc->dev_state == DEVICE_ERROR) + { + return grub_error (GRUB_ERR_BAD_DEVICE, "dev state is NOT OK.\n"); + } /* read in a data block */ return grub_disk_read (desc->dev->disk, sector, 0, len, buf); } case DEVICE_MIRROR: { - grub_err_t err = GRUB_ERR_NONE; - unsigned i; + mirror_map_t *mm = NULL; + if (desc->n_children <= 0) return grub_error (GRUB_ERR_BAD_FS, "non-positive number of mirror children"); - for (i = 0; i < desc->n_children; i++) + + mm = vdev_mirror_map_alloc(desc, offset, desc->n_children); + if (mm == NULL) + err = grub_errno; + else { - err = read_device (offset, &desc->children[i], - len, buf); - if (!err) - break; - grub_errno = GRUB_ERR_NONE; + mm->mm_buf = buf; + mm->mm_size = len; } - grub_errno = err; + *priv = mm; return err; } case DEVICE_RAIDZ: { - unsigned c = 0; - grub_uint64_t high; - grub_uint64_t devn; - grub_uint64_t m; - grub_uint32_t s, orig_s; - void *orig_buf = buf; - grub_size_t orig_len = len; - grub_uint8_t *recovery_buf[4]; - grub_size_t recovery_len[4]; - unsigned recovery_idx[4]; - unsigned failed_devices = 0; - int idx, orig_idx; + raidz_map_t *rm; + raidz_col_t *rc; + grub_int64_t c; + struct grub_zfs_device_desc *cvd; if (desc->nparity < 1 || desc->nparity > 3) return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, "raidz%d is not supported", desc->nparity); - orig_s = (((len + (1 << desc->ashift) - 1) >> desc->ashift) - + (desc->n_children - desc->nparity) - 1); - s = orig_s; - - high = grub_divmod64 ((offset >> desc->ashift), - desc->n_children, &m); - if (desc->nparity == 2) - c = 2; - if (desc->nparity == 3) - c = 3; - if (((len + (1 << desc->ashift) - 1) >> desc->ashift) - >= (desc->n_children - desc->nparity)) - idx = (desc->n_children - desc->nparity - 1); - else - idx = ((len + (1 << desc->ashift) - 1) >> desc->ashift) - 1; - orig_idx = idx; - while (len > 0) + return grub_error(GRUB_ERR_BAD_FS, + "too little devices for given parity"); + rm = vdev_raidz_map_alloc(buf, len, offset, desc->ashift, + desc->n_children, desc->nparity); + if (rm == NULL) + return grub_errno; + + grub_dprintf("zfs", "read_device: cols = %" PRIuGRUB_UINT64_T + ", firstdatacol = %" PRIuGRUB_UINT64_T "\n", + rm->rm_cols, rm->rm_firstdatacol); + + /* + * Iterate over the columns in reverse order so that we hit the parity + * last -- any errors along the way will force us to read the parity. + */ + for (c = rm->rm_cols - 1; c >= 0; c--) { - grub_size_t csize; - grub_uint32_t bsize; - grub_err_t err; - bsize = s / (desc->n_children - desc->nparity); - - if (desc->nparity == 1 - && ((offset >> (desc->ashift + 20 - desc->max_children_ashift)) - & 1) == c) - c++; - - high = grub_divmod64 ((offset >> desc->ashift) + c, - desc->n_children, &devn); - csize = bsize << desc->ashift; - if (csize > len) - csize = len; - - grub_dprintf ("zfs", "RAIDZ mapping 0x%" PRIxGRUB_UINT64_T - "+%u (%" PRIxGRUB_SIZE ", %" PRIxGRUB_UINT32_T - ") -> (0x%" PRIxGRUB_UINT64_T ", 0x%" - PRIxGRUB_UINT64_T ")\n", - offset >> desc->ashift, c, len, bsize, high, - devn); - err = read_device ((high << desc->ashift) - | (offset & ((1 << desc->ashift) - 1)), - &desc->children[devn], - csize, buf); - if (err && failed_devices < desc->nparity) + rc = &rm->rm_col[c]; + cvd = &desc->children[rc->rc_devidx]; + if(cvd->dev_state == DEVICE_ERROR) { - recovery_buf[failed_devices] = buf; - recovery_len[failed_devices] = csize; - recovery_idx[failed_devices] = idx; - failed_devices++; - grub_errno = err = 0; - } - if (err) - return err; - - c++; - idx--; - s--; - buf = (char *) buf + csize; - len -= csize; - } - if (failed_devices) - { - unsigned redundancy_pow[4]; - unsigned cur_redundancy_pow = 0; - unsigned n_redundancy = 0; - unsigned i, j; - grub_err_t err; - - /* Compute mul. x**s has a period of 255. */ - if (powx[0] == 0) + if ((grub_uint64_t)c >= rm->rm_firstdatacol) + rm->rm_missingdata++; + else + rm->rm_missingparity++; + rc->rc_error = GRUB_ERR_IO; + rc->rc_tried = 1; /* don't even try */ + rc->rc_skipped = 1; + continue; + } + grub_dprintf("zfs", "%" PRIuGRUB_UINT64_T ":%" PRIxGRUB_UINT64_T + ":%" PRIxGRUB_UINT64_T "\n", rc->rc_devidx, + rc->rc_offset, rc->rc_size); + if ((grub_uint64_t)c >= rm->rm_firstdatacol || + rm->rm_missingdata > 0) { - grub_uint8_t cur = 1; - for (i = 0; i < 255; i++) + err = read_device (rc->rc_offset, cvd, rc->rc_size, + rc->rc_data, NULL); + rc->rc_error = err; + rc->rc_tried = 1; + rc->rc_skipped = 0; + if (err) { - powx[i] = cur; - powx[i + 255] = cur; - powx_inv[cur] = i; - if (cur & 0x80) - cur = (cur << 1) ^ poly; + /* missing data will trigger parity read */ + if ((grub_uint64_t)c >= rm->rm_firstdatacol) + rm->rm_missingdata++; else - cur <<= 1; - } - } - - /* Read redundancy data. */ - for (n_redundancy = 0, cur_redundancy_pow = 0; - n_redundancy < failed_devices; - cur_redundancy_pow++) - { - high = grub_divmod64 ((offset >> desc->ashift) - + cur_redundancy_pow - + ((desc->nparity == 1) - && ((offset >> (desc->ashift + 20 - - desc->max_children_ashift)) - & 1)), - desc->n_children, &devn); - err = read_device ((high << desc->ashift) - | (offset & ((1 << desc->ashift) - 1)), - &desc->children[devn], - recovery_len[n_redundancy], - recovery_buf[n_redundancy]); - /* Ignore error if we may still have enough devices. */ - if (err && n_redundancy + desc->nparity - cur_redundancy_pow - 1 - >= failed_devices) - { - grub_errno = GRUB_ERR_NONE; - continue; + rm->rm_missingparity++; } - if (err) - return err; - redundancy_pow[n_redundancy] = cur_redundancy_pow; - n_redundancy++; - } - /* Now xor-our the parts we already know. */ - buf = orig_buf; - len = orig_len; - s = orig_s; - idx = orig_idx; - - while (len > 0) - { - grub_size_t csize; - csize = ((s / (desc->n_children - desc->nparity)) - << desc->ashift); - if (csize > len) - csize = len; - - for (j = 0; j < failed_devices; j++) - if (buf == recovery_buf[j]) - break; - - if (j == failed_devices) - for (j = 0; j < failed_devices; j++) - xor_out (recovery_buf[j], buf, - csize < recovery_len[j] ? csize : recovery_len[j], - idx, redundancy_pow[j]); - - s--; - buf = (char *) buf + csize; - len -= csize; - idx--; - } - for (i = 0; i < failed_devices - && recovery_len[i] == recovery_len[0]; - i++); - /* Since the chunks have variable length handle the last block - separately. */ - if (i != failed_devices) - { - grub_uint8_t *tmp_recovery_buf[4]; - for (j = 0; j < i; j++) - tmp_recovery_buf[j] = recovery_buf[j] + recovery_len[failed_devices - 1]; - err = recovery (tmp_recovery_buf, recovery_len[0] - recovery_len[failed_devices - 1], i, redundancy_pow, - recovery_idx); - if (err) - return err; } - err = recovery (recovery_buf, recovery_len[failed_devices - 1], - failed_devices, redundancy_pow, recovery_idx); - if (err) - return err; } + *priv = rm; + return GRUB_ERR_NONE; } } return grub_error (GRUB_ERR_BAD_FS, "unsupported device type"); } -static grub_err_t -read_dva (const dva_t *dva, - grub_zfs_endian_t endian, struct grub_zfs_data *data, - void *buf, grub_size_t len) +static int +vdev_raidz_combrec(blkptr_t *bp, grub_zfs_endian_t endian, raidz_map_t *rm, + int total_errors, int data_errors) { - grub_uint64_t offset; - unsigned i; - grub_err_t err = 0; - int try = 0; - offset = dva_get_offset (dva, endian); + raidz_col_t *rc; + void *orig[VDEV_RAIDZ_MAXPARITY]; + int tstore[VDEV_RAIDZ_MAXPARITY + 2]; + int *tgts = &tstore[1]; + int current, next, i, c, n; + int code, ret = 0; - for (try = 0; try < 2; try++) + /* + * This simplifies one edge condition. + */ + tgts[-1] = -1; + + for (n = 1; n <= (int) rm->rm_firstdatacol - total_errors; n++) { - for (i = 0; i < data->n_devices_attached; i++) - if (data->devices_attached[i].id == DVA_GET_VDEV (dva)) - { - err = read_device (offset, &data->devices_attached[i], len, buf); - if (!err) - return GRUB_ERR_NONE; - break; - } - if (try == 1) - break; - err = scan_devices (data); - if (err) - return err; - } - if (!err) - return grub_error (GRUB_ERR_BAD_FS, "unknown device %d", - (int) DVA_GET_VDEV (dva)); - return err; -} + /* + * Initialize the targets array by finding the first n columns + * that contain no error. + * + * If there were no data errors, we need to ensure that we're + * always explicitly attempting to reconstruct at least one + * data column. To do this, we simply push the highest target + * up into the data columns. + */ + for (c = 0, i = 0; i < n; i++) + { + if (i == n - 1 && data_errors == 0 && c < (int) rm->rm_firstdatacol) + { + c = rm->rm_firstdatacol; + } -/* - * Read a block of data based on the gang block address dva, - * and put its data in buf. - * - */ -static grub_err_t -zio_read_gang (blkptr_t * bp, grub_zfs_endian_t endian, dva_t * dva, void *buf, - struct grub_zfs_data *data) -{ - zio_gbh_phys_t *zio_gb; - unsigned i; - grub_err_t err; - zio_cksum_t zc; + while (rm->rm_col[c].rc_error != 0) + c++; - grub_memset (&zc, 0, sizeof (zc)); + tgts[i] = c++; + } - zio_gb = grub_malloc (SPA_GANGBLOCKSIZE); - if (!zio_gb) - return grub_errno; - grub_dprintf ("zfs", endian == GRUB_ZFS_LITTLE_ENDIAN ? "little-endian gang\n" - :"big-endian gang\n"); + /* + * Setting tgts[n] simplifies the other edge condition. + */ + tgts[n] = rm->rm_cols; - err = read_dva (dva, endian, data, zio_gb, SPA_GANGBLOCKSIZE); - if (err) - { - grub_free (zio_gb); - return err; - } + orig[n - 1] = grub_malloc(rm->rm_col[0].rc_size); - /* XXX */ - /* self checksuming the gang block header */ - ZIO_SET_CHECKSUM (&zc, DVA_GET_VDEV (dva), - dva_get_offset (dva, endian), bp->blk_birth, 0); - err = zio_checksum_verify (zc, ZIO_CHECKSUM_GANG_HEADER, endian, - (char *) zio_gb, SPA_GANGBLOCKSIZE); - if (err) - { - grub_free (zio_gb); - return err; - } + current = 0; + next = tgts[current]; - endian = (grub_zfs_to_cpu64 (bp->blk_prop, endian) >> 63) & 1; + while (current != n) + { + tgts[current] = next; + current = 0; - for (i = 0; i < SPA_GBH_NBLKPTRS; i++) - { - if (zio_gb->zg_blkptr[i].blk_birth == 0) - continue; + /* + * Save off the original data that we're going to + * attempt to reconstruct. + */ + for (i = 0; i < n; i++) + { + c = tgts[i]; + rc = &rm->rm_col[c]; + grub_memcpy(orig[i], rc->rc_data, rc->rc_size); + } - err = zio_read_data (&zio_gb->zg_blkptr[i], endian, buf, data); - if (err) - { - grub_free (zio_gb); - return err; - } - buf = (char *) buf + get_psize (&zio_gb->zg_blkptr[i], endian); - } - grub_free (zio_gb); - return GRUB_ERR_NONE; -} + /* + * Attempt a reconstruction and exit the outer loop on + * success. + */ + code = vdev_raidz_reconstruct(rm, tgts, n); + if (zio_checksum_error(bp, endian, + rm->rm_col[rm->rm_firstdatacol].rc_data) == 0) + { + for (i = 0; i < n; i++) + { + c = tgts[i]; + rc = &rm->rm_col[c]; + rc->rc_error = GRUB_ERR_IO; + } -/* - * Read in a block of raw data to buf. - */ -static grub_err_t -zio_read_data (blkptr_t * bp, grub_zfs_endian_t endian, void *buf, - struct grub_zfs_data *data) -{ - int i, psize; - grub_err_t err = GRUB_ERR_NONE; + ret = code; + goto done; + } - psize = get_psize (bp, endian); + /* + * Restore the original data. + */ + for (i = 0; i < n; i++) + { + c = tgts[i]; + rc = &rm->rm_col[c]; + grub_memcpy(rc->rc_data, orig[i], rc->rc_size); + } - /* pick a good dva from the block pointer */ - for (i = 0; i < SPA_DVAS_PER_BP; i++) - { - if (bp->blk_dva[i].dva_word[0] == 0 && bp->blk_dva[i].dva_word[1] == 0) - continue; + do { + /* + * Find the next valid column after the current + * position.. + */ + for (next = tgts[current] + 1; + next < (int) rm->rm_cols && rm->rm_col[next].rc_error != 0; + next++) + continue; - if ((grub_zfs_to_cpu64 (bp->blk_dva[i].dva_word[1], endian)>>63) & 1) - err = zio_read_gang (bp, endian, &bp->blk_dva[i], buf, data); - else - err = read_dva (&bp->blk_dva[i], endian, data, buf, psize); - if (!err) - return GRUB_ERR_NONE; - grub_errno = GRUB_ERR_NONE; + /* + * If that spot is available, we're done here. + */ + if (next != tgts[current + 1]) + break; + + /* + * Otherwise, find the next valid column after + * the previous position. + */ + for (c = tgts[current - 1] + 1; rm->rm_col[c].rc_error != 0; c++) + continue; + + tgts[current] = c; + current++; + + } while (current != n); + } } + n--; - if (!err) - err = grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid DVA"); - grub_errno = err; +done: + for (i = 0; i < n; i++) + grub_free(orig[i]); - return err; + return (ret); } -/* - * Read in a block of data, verify its checksum, decompress if needed, - * and put the uncompressed data in buf. - */ static grub_err_t -zio_read (blkptr_t *bp, grub_zfs_endian_t endian, void **buf, - grub_size_t *size, struct grub_zfs_data *data) +vdev_raidz_io_done(blkptr_t *bp, struct grub_zfs_device_desc *desc, + grub_zfs_endian_t endian, raidz_map_t *rm) { - grub_size_t lsize, psize; - unsigned int comp, encrypted; - char *compbuf = NULL; - grub_err_t err; - zio_cksum_t zc = bp->blk_cksum; - grub_uint32_t checksum; - - *buf = NULL; + raidz_col_t *rc; + int unexpected_errors = 0; + int parity_errors = 0; + int parity_untried = 0; + int data_errors = 0; + int total_errors = 0; + int n, c; + int tgts[VDEV_RAIDZ_MAXPARITY]; + int code; + grub_err_t err = GRUB_ERR_NONE; - checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 40) & 0xff; - comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0xff; - encrypted = ((grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 60) & 3); - lsize = (BP_IS_HOLE(bp) ? 0 : - (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian) & 0xffff) + 1) - << SPA_MINBLOCKSHIFT)); - psize = get_psize (bp, endian); + for (c = 0; c < (int)rm->rm_cols; c++) + { + rc = &rm->rm_col[c]; + if (rc->rc_error) + { + if (c < (int)rm->rm_firstdatacol) + parity_errors++; + else + data_errors++; - if (size) - *size = lsize; + if (!rc->rc_skipped) + unexpected_errors++; - if (comp >= ZIO_COMPRESS_FUNCTIONS) - return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, - "compression algorithm %u not supported\n", (unsigned int) comp); + total_errors++; + } + else if (c < (int)rm->rm_firstdatacol && !rc->rc_tried) + { + parity_untried++; + } + } + /* + * There are three potential phases for a read: + * 1. produce valid data from the columns read + * 2. read all disks and try again + * 3. perform combinatorial reconstruction + * + * Each phase is progressively both more expensive and less likely to + * occur. If we encounter more errors than we can repair or all phases + * fail, we have no choice but to return an error. + */ - if (comp != ZIO_COMPRESS_OFF && decomp_table[comp].decomp_func == NULL) + /* + * If the number of errors we saw was correctable -- less than or equal + * to the number of parity disks read -- attempt to produce data that + * has a valid checksum. Naturally, this case applies in the absence of + * any errors. + */ + if (total_errors <= (int)rm->rm_firstdatacol - parity_untried) + { + if (data_errors == 0) + { + rc = &rm->rm_col[rm->rm_firstdatacol]; + if ((err = zio_checksum_error(bp, endian, rc->rc_data)) == 0) + { + goto done; + } + else + rm->rm_ecksuminjected = 1; + } + else + { + n = 0; + for (c = rm->rm_firstdatacol; c < (int)rm->rm_cols; c++) + { + rc = &rm->rm_col[c]; + if (rc->rc_error != 0) + tgts[n++] = c; + } + code = vdev_raidz_reconstruct(rm, tgts, n); + rc = &rm->rm_col[rm->rm_firstdatacol]; + if ((err = zio_checksum_error(bp, endian, rc->rc_data)) == 0) + { + goto done; + } + else + rm->rm_ecksuminjected = 1; + } + } + /* + * if all else fails. make sure we have read all columns and + * check again. + */ + unexpected_errors = 1; + rm->rm_missingdata = 0; + rm->rm_missingparity = 0; + + for (c = 0; c < (int) rm->rm_cols; c++) + { + struct grub_zfs_device_desc *cvd; + if (rm->rm_col[c].rc_tried) + continue; + + do { + rc = &rm->rm_col[c]; + if (rc->rc_tried) + continue; + + cvd = &desc->children[rc->rc_devidx]; + err = read_device (rc->rc_offset, cvd, rc->rc_size, + rc->rc_data, NULL); + rc->rc_error = err; + rc->rc_tried = 1; + rc->rc_skipped = 0; + } while (++c < (int) rm->rm_cols); + + /* and repeat the check */ + return vdev_raidz_io_done(bp, desc, endian, rm); + } + + /* + * all columns are read, it could still be the silent data corruption. + */ + err = GRUB_ERR_IO; + + if (total_errors < (int) rm->rm_firstdatacol) + { + code = vdev_raidz_combrec(bp, endian, rm, total_errors, data_errors); + if (code != 0) + err = GRUB_ERR_NONE; + } + +done: + grub_dprintf("zfs", "vdev_raidz_io_done (%d):\n" + "\tdata errors: %d\n" + "\tparity errors: %d\n" + "\tparity untried: %d\n" + "\tunexpected errors: %d\n" + "\ttotal errors: %d\n", err, data_errors, parity_errors, + parity_untried, unexpected_errors, total_errors); + + vdev_raidz_map_free(rm); + return err; +} + +static grub_err_t +vdev_mirror_io_done(blkptr_t *bp, grub_zfs_endian_t endian, mirror_map_t *mm) +{ + mirror_child_t *mc = NULL; + int c; + grub_err_t err = GRUB_ERR_NONE; + + while ((c = vdev_mirror_child_select(mm)) >= 0) + { + grub_dprintf("zfs", "reading mirror child: %d\n", c); + mc = &mm->mm_child[c]; + err = read_device (mm->mm_offset, mc->mc_vd, mm->mm_size, + mm->mm_buf, NULL); + mc->mc_tried = 1; + mc->mc_skipped = 0; + if (err == GRUB_ERR_NONE) + { + err = zio_checksum_error(bp, endian, mm->mm_buf); + if (err == GRUB_ERR_NONE) + break; + } + mc->mc_error = err; + } + + grub_free(mm); + return err; +} + +static grub_err_t +read_dva (blkptr_t *bp, const dva_t *dva, grub_zfs_endian_t endian, + struct grub_zfs_data *data, void *buf, grub_size_t len) +{ + grub_uint64_t offset; + grub_size_t align; + grub_size_t asize; + void *abuf = NULL, *orig_buf = NULL; + unsigned i; + void *priv; + grub_err_t err = GRUB_ERR_NONE; + offset = DVA_GET_OFFSET (dva, endian); + struct grub_zfs_device_desc *desc; + + for (i = 0; i < data->n_devices_attached; i++) + if (data->devices_attached[i].id == DVA_GET_VDEV (dva)) + { + desc = &data->devices_attached[i]; + align = 1 << desc->ashift; + + if (P2PHASE(len, align) != 0) { + grub_dprintf("zfs", "read_dva: unaligned read: %" + PRIdGRUB_SSIZE "(%" PRIdGRUB_SSIZE ")\n", + len, P2ROUNDUP(len, align)); + asize = P2ROUNDUP(len, align); + abuf = grub_malloc(asize); + orig_buf = buf; + } else { + asize = len; + abuf = buf; + } + + grub_dprintf("zfs", "read_dva vdev: %d\n", i); + err = read_device (offset, desc, asize, abuf, &priv); + if (err != GRUB_ERR_NONE) + break; + + if (data->devices_attached[i].type == DEVICE_MIRROR) + { + mirror_map_t *mm = priv; + err = vdev_mirror_io_done(bp, endian, mm); + } + if (data->devices_attached[i].type == DEVICE_RAIDZ) + { + raidz_map_t *rm = priv; + err = vdev_raidz_io_done(bp, desc, endian, rm); + } + if (data->devices_attached[i].type == DEVICE_LEAF) + { + err = zio_checksum_error(bp, endian, abuf); + } + + if (err == GRUB_ERR_NONE) + { + if (orig_buf != NULL) + { + grub_memcpy(buf, abuf, len); + grub_free(abuf); + } + return err; + } + break; + } + + if (orig_buf != NULL) + grub_free(abuf); + + if (!err) + return grub_error (GRUB_ERR_BAD_FS, "unknown device %d", + (int) DVA_GET_VDEV (dva)); + return err; +} + +/* + * Read a block of data based on the gang block address dva, + * and put its data in buf. + */ +static grub_err_t +zio_read_gang_data (blkptr_t * bp, grub_zfs_endian_t endian, void *buf, + struct grub_zfs_data *data) +{ + int i; + + /* pick a good dva from the block pointer */ + for (i = 0; i < BP_GET_NDVAS(bp); i++) + { + if (zio_read_common(bp, &bp->blk_dva[i], endian, buf, data) == GRUB_ERR_NONE) + return GRUB_ERR_NONE; + } + return grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid DVA"); +} + +/* + * Read gang block header, verify its checksum, loop through all gang blocks + * to collect its data based on the gang block address dva and put it in buf. + * + */ +static grub_err_t +zio_read_gang (blkptr_t * bp, grub_zfs_endian_t endian, dva_t * dva, void *buf, + struct grub_zfs_data *data) +{ + zio_gbh_phys_t *zio_gb; + unsigned i; + grub_err_t err; + + zio_gb = grub_malloc (SPA_GANGBLOCKSIZE); + if (!zio_gb) + return grub_errno; + grub_dprintf ("zfs", endian == GRUB_ZFS_LITTLE_ENDIAN ? "little-endian gang\n" + :"big-endian gang\n"); + + err = read_dva (bp, dva, endian, data, zio_gb, SPA_GANGBLOCKSIZE); + if (err) + { + grub_free (zio_gb); + return err; + } + + endian = (grub_zfs_to_cpu64 (bp->blk_prop, endian) >> 63) & 1; + + for (i = 0; i < SPA_GBH_NBLKPTRS; i++) + { + if (zio_gb->zg_blkptr[i].blk_birth == 0) + continue; + + err = zio_read_gang_data (&zio_gb->zg_blkptr[i], endian, buf, data); + if (err) + { + grub_free (zio_gb); + return err; + } + buf = (char *) buf + BP_GET_PSIZE (&zio_gb->zg_blkptr[i], endian); + } + grub_free (zio_gb); + return GRUB_ERR_NONE; +} + +/* + * Read in a block of raw data to buf. + */ +static grub_err_t +zio_read_common (blkptr_t * bp, dva_t *dva, grub_zfs_endian_t endian, + void *buf, struct grub_zfs_data *data) +{ + int psize; + grub_err_t err = GRUB_ERR_NONE; + + psize = BP_GET_PSIZE (bp, endian); + + if (dva->dva_word[0] == 0 && dva->dva_word[1] == 0) + return grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid DVA"); + + if (BP_IS_GANG(bp, endian)) + err = zio_read_gang (bp, endian, dva, buf, data); + else + err = read_dva (bp, dva, endian, data, buf, psize); + + return err; + +} + +/* + * Loop through DVAs to read in a block of raw data to buf and verify + * the checksum. + */ +static grub_err_t +zio_read_data (blkptr_t * bp, grub_zfs_endian_t endian, void *buf, + struct grub_zfs_data *data) +{ + int i; + grub_err_t err = GRUB_ERR_NONE; + + /* pick a good dva from the block pointer */ + for (i = 0; i < BP_GET_NDVAS(bp); i++) + { + if (zio_read_common(bp, &bp->blk_dva[i], endian, buf, data) != GRUB_ERR_NONE) + { + grub_errno = GRUB_ERR_NONE; + continue; + } + /* if no errors, return from here */ + return GRUB_ERR_NONE; + } + + err = grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid DVA"); + grub_errno = err; + + return err; +} + +/* + * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be + * more than BPE_PAYLOAD_SIZE bytes). + */ +static grub_err_t +decode_embedded_bp_compressed(const blkptr_t *bp, grub_zfs_endian_t endian, + void *buf) +{ + grub_size_t psize, i; + grub_uint8_t *buf8 = buf; + grub_uint64_t w = 0; + const grub_uint64_t *bp64 = (const grub_uint64_t *)bp; + + psize = BPE_GET_PSIZE(bp, endian); + + /* + * Decode the words of the block pointer into the byte array. + * Low bits of first word are the first byte (little endian). + */ + for (i = 0; i < psize; i++) { + if (i % sizeof (w) == 0) { + /* beginning of a word */ + w = grub_zfs_to_cpu64(*bp64, endian); + bp64++; + if (!BPE_IS_PAYLOADWORD(bp, bp64)) + bp64++; + } + buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); + } + return GRUB_ERR_NONE; +} + +/* + * Read in a block of data, verify its checksum, decompress if needed, + * and put the uncompressed data in buf. + */ +static grub_err_t +zio_read (blkptr_t *bp, grub_zfs_endian_t endian, void **buf, + grub_size_t *size, struct grub_zfs_data *data) +{ + grub_size_t lsize, psize; + unsigned int comp, encrypted; + char *compbuf = NULL; + zio_cksum_t zc = bp->blk_cksum; + grub_err_t err; + + *buf = NULL; + + comp = BP_GET_COMPRESS(bp, endian); + encrypted = ((grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 60) & 3); + + grub_dprintf("zfs", "zio_read compress %d\n", (unsigned int) comp); + if (BP_IS_EMBEDDED(bp, endian)) { + if(BPE_GET_ETYPE(bp, endian) != BP_EMBEDDED_TYPE_DATA) { + return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, + "unsupported embedded BP (type=%u)\n", + (int) BPE_GET_ETYPE(bp, endian)); + } + lsize = BPE_GET_LSIZE(bp, endian); + psize = BPE_GET_PSIZE(bp, endian); + } else { + lsize = BP_GET_LSIZE(bp, endian); + psize = BP_GET_PSIZE(bp, endian); + } + + grub_dprintf("zfs", "zio_read: size %" PRIdGRUB_SSIZE "/%" + PRIdGRUB_SSIZE "\n", lsize, psize); + if (comp >= ZIO_COMPRESS_FUNCTIONS) + return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, + "compression algorithm %u not supported\n", (unsigned int) comp); + + if (comp != ZIO_COMPRESS_OFF && decomp_table[comp].decomp_func == NULL) return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, "compression algorithm %s not supported\n", decomp_table[comp].name); @@ -1841,23 +2432,25 @@ return grub_errno; } else - compbuf = *buf = grub_malloc (lsize); - - grub_dprintf ("zfs", "endian = %d\n", endian); - err = zio_read_data (bp, endian, compbuf, data); - if (err) { - grub_free (compbuf); - *buf = NULL; - return err; + compbuf = *buf = grub_malloc (lsize); + if (! compbuf) + return grub_errno; } - grub_memset (compbuf, 0, ALIGN_UP (psize, 16) - psize); - err = zio_checksum_verify (zc, checksum, endian, - compbuf, psize); + if (size) + *size = lsize; + + grub_dprintf ("zfs", "endian = %d\n", endian); + if (BP_IS_EMBEDDED(bp, endian)) { + err = decode_embedded_bp_compressed(bp, endian, compbuf); + } else { + err = zio_read_data (bp, endian, compbuf, data); + grub_memset (compbuf, 0, ALIGN_UP (psize, 16) - psize); + } + if (err) { - grub_dprintf ("zfs", "incorrect checksum\n"); grub_free (compbuf); *buf = NULL; return err; @@ -1950,23 +2543,13 @@ grub_zfs_endian_t endian; grub_err_t err = GRUB_ERR_NONE; - bp = grub_malloc (sizeof (blkptr_t)); - if (!bp) - return grub_errno; - endian = dn->endian; for (level = dn->dn.dn_nlevels - 1; level >= 0; level--) { grub_dprintf ("zfs", "endian = %d\n", endian); idx = (blkid >> (epbs * level)) & ((1 << epbs) - 1); - *bp = bp_array[idx]; - if (bp_array != dn->dn.dn_blkptr) - { - grub_free (bp_array); - bp_array = 0; - } - - if (BP_IS_HOLE (bp)) + bp = &bp_array[idx]; + if (BP_IS_HOLE (bp, endian)) { grub_size_t size = grub_zfs_to_cpu16 (dn->dn.dn_datablkszsec, dn->endian) @@ -1993,6 +2576,8 @@ endian = (grub_zfs_to_cpu64 (bp->blk_prop, endian) >> 63) & 1; if (err) break; + if (bp_array != dn->dn.dn_blkptr) + grub_free (bp_array); bp_array = tmpbuf; } if (bp_array != dn->dn.dn_blkptr) @@ -2000,7 +2585,6 @@ if (endian_out) *endian_out = endian; - grub_free (bp); return err; } @@ -2030,11 +2614,38 @@ return grub_error (GRUB_ERR_FILE_NOT_FOUND, N_("file `%s' not found"), name); } +/* + * mzap_value_search: Looks up value and returns property name. + */ +static grub_err_t +mzap_value_search (mzap_phys_t *zapobj, grub_zfs_endian_t endian, + int objsize, char *name, grub_uint64_t *value) +{ + int i, chunks; + mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; + grub_uint64_t mze_val; + + chunks = objsize / MZAP_ENT_LEN - 1; + for (i = 0; i < chunks; i++) + { + mze_val = grub_zfs_to_cpu64 (mzap_ent[i].mze_value, endian); + if (mze_val == *value) + { + grub_memcpy (name, mzap_ent[i].mze_name, grub_strlen + (mzap_ent[i].mze_name) + 1); + return GRUB_ERR_NONE; + } + } + + return grub_error (GRUB_ERR_FILE_NOT_FOUND, + "mzap_value_search: couldn't find %s", value); +} + static int mzap_iterate (mzap_phys_t * zapobj, grub_zfs_endian_t endian, int objsize, - int (*hook) (const char *name, grub_uint64_t val, - struct grub_zfs_dir_ctx *ctx), - struct grub_zfs_dir_ctx *ctx) + int (*hook) (const char *name, grub_uint64_t val, + struct grub_zfs_dir_ctx *ctx), + struct grub_zfs_dir_ctx *ctx) { int i, chunks; mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; @@ -2088,7 +2699,7 @@ */ crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); - return crc; + return (crc); } /* @@ -2110,7 +2721,7 @@ while (n--) { if (grub_toupper (*t1) != grub_toupper (*t2)) - return (int) grub_toupper (*t1) - (int) grub_toupper (*t2); + return (int) grub_toupper (*t1) - (int) grub_toupper (*t2); t1++; t2++; @@ -2298,9 +2909,8 @@ fzap_iterate (dnode_end_t * zap_dnode, zap_phys_t * zap, grub_size_t name_elem_length, int (*hook) (const void *name, grub_size_t name_length, - const void *val_in, - grub_size_t nelem, grub_size_t elemsize, - void *data), + const void *val_in, grub_size_t nelem, + grub_size_t elemsize, void *data), void *hook_data, struct grub_zfs_data *data) { zap_leaf_phys_t *l; @@ -2331,11 +2941,11 @@ for (idx = 0; idx < (1ULL << zap->zap_ptrtbl.zt_shift); idx++) { blkid = grub_zfs_to_cpu64 (((grub_uint64_t *) zap)[idx + (1 << (blksft - 3 - 1))], - zap_dnode->endian); + zap_dnode->endian); for (idx2 = 0; idx2 < idx; idx2++) if (blkid == grub_zfs_to_cpu64 (((grub_uint64_t *) zap)[idx2 + (1 << (blksft - 3 - 1))], - zap_dnode->endian)) + zap_dnode->endian)) break; if (idx2 != idx) continue; @@ -2361,55 +2971,55 @@ } for (chunk = 0; chunk < ZAP_LEAF_NUMCHUNKS (blksft); chunk++) - { - char *buf; - struct zap_leaf_entry *le; - char *val; - grub_size_t val_length; - le = ZAP_LEAF_ENTRY (l, blksft, chunk); - - /* Verify the chunk entry */ - if (le->le_type != ZAP_CHUNK_ENTRY) - continue; + { + char *buf; + struct zap_leaf_entry *le; + char *val; + grub_size_t val_length; + le = ZAP_LEAF_ENTRY (l, blksft, chunk); - buf = grub_malloc (grub_zfs_to_cpu16 (le->le_name_length, endian) - * name_elem_length + 1); - if (zap_leaf_array_get (l, endian, blksft, - grub_zfs_to_cpu16 (le->le_name_chunk, - endian), - grub_zfs_to_cpu16 (le->le_name_length, - endian) - * name_elem_length, buf)) - { - grub_free (buf); + /* Verify the chunk entry */ + if (le->le_type != ZAP_CHUNK_ENTRY) continue; - } - buf[le->le_name_length * name_elem_length] = 0; - val_length = ((int) le->le_value_length - * (int) le->le_int_size); - val = grub_malloc (grub_zfs_to_cpu16 (val_length, endian)); - if (zap_leaf_array_get (l, endian, blksft, - grub_zfs_to_cpu16 (le->le_value_chunk, - endian), - val_length, val)) - { - grub_free (buf); - grub_free (val); - continue; - } + buf = grub_malloc (grub_zfs_to_cpu16 (le->le_name_length, endian) + * name_elem_length + 1); + if (zap_leaf_array_get (l, endian, blksft, + grub_zfs_to_cpu16 (le->le_name_chunk, + endian), + grub_zfs_to_cpu16 (le->le_name_length, + endian) + * name_elem_length, buf)) + { + grub_free (buf); + continue; + } + buf[le->le_name_length * name_elem_length] = 0; - if (hook (buf, le->le_name_length, - val, le->le_value_length, le->le_int_size, hook_data)) - { - grub_free (l); - return 1; - } - grub_free (buf); - grub_free (val); - } - grub_free (l); - } + val_length = ((int) le->le_value_length + * (int) le->le_int_size); + val = grub_malloc (grub_zfs_to_cpu16 (val_length, endian)); + if (zap_leaf_array_get (l, endian, blksft, + grub_zfs_to_cpu16 (le->le_value_chunk, + endian), + val_length, val)) + { + grub_free (buf); + grub_free (val); + continue; + } + + if (hook (buf, le->le_name_length, + val, le->le_value_length, le->le_int_size, hook_data)) + { + grub_free (l); + return 1; + } + grub_free (buf); + grub_free (val); + } + grub_free (l); + } return 0; } @@ -2463,116 +3073,154 @@ return grub_error (GRUB_ERR_BAD_FS, "unknown ZAP type"); } -/* Context for zap_iterate_u64. */ -struct zap_iterate_u64_ctx -{ - int (*hook) (const char *, grub_uint64_t, struct grub_zfs_dir_ctx *); - struct grub_zfs_dir_ctx *dir_ctx; -}; - -/* Helper for zap_iterate_u64. */ -static int -zap_iterate_u64_transform (const void *name, - grub_size_t namelen __attribute__ ((unused)), - const void *val_in, - grub_size_t nelem, - grub_size_t elemsize, - void *data) -{ - struct zap_iterate_u64_ctx *ctx = data; - - if (elemsize != sizeof (grub_uint64_t) || nelem != 1) - return 0; - return ctx->hook (name, grub_be_to_cpu64 (*(const grub_uint64_t *) val_in), - ctx->dir_ctx); -} - -static int -zap_iterate_u64 (dnode_end_t * zap_dnode, - int (*hook) (const char *name, grub_uint64_t val, - struct grub_zfs_dir_ctx *ctx), - struct grub_zfs_data *data, struct grub_zfs_dir_ctx *ctx) +/* + * Read in the data of a zap object and find the property name for a + * matching value. + * + */ +static grub_err_t +zap_value_search (dnode_end_t *zap_dnode, char *name, grub_uint64_t *val, + struct grub_zfs_data *data) { grub_uint64_t block_type; int size; void *zapbuf; grub_err_t err; - int ret; grub_zfs_endian_t endian; + grub_dprintf ("zfs", "zap_value_search: looking for '%lld'\n", (unsigned long long)*val); + /* Read in the first block of the zap object data. */ - size = grub_zfs_to_cpu16 (zap_dnode->dn.dn_datablkszsec, zap_dnode->endian) << SPA_MINBLOCKSHIFT; + size = grub_zfs_to_cpu16 (zap_dnode->dn.dn_datablkszsec, + zap_dnode->endian) << SPA_MINBLOCKSHIFT; err = dmu_read (zap_dnode, 0, &zapbuf, &endian, data); if (err) - return 0; + return err; block_type = grub_zfs_to_cpu64 (*((grub_uint64_t *) zapbuf), endian); - grub_dprintf ("zfs", "zap iterate\n"); + grub_dprintf ("zfs", "zap_value_search: zap read\n"); if (block_type == ZBT_MICRO) { - grub_dprintf ("zfs", "micro zap\n"); - ret = mzap_iterate (zapbuf, endian, size, hook, ctx); + grub_dprintf ("zfs", "zap_value_search: micro zap value search\n"); + err = (mzap_value_search (zapbuf, endian, size, name, val)); + grub_dprintf ("zfs", "zap_value_search: returned %d\n", err); grub_free (zapbuf); - return ret; + return err; } else if (block_type == ZBT_HEADER) { - struct zap_iterate_u64_ctx transform_ctx = { - .hook = hook, - .dir_ctx = ctx - }; - - grub_dprintf ("zfs", "fat zap\n"); /* this is a fat zap */ - ret = fzap_iterate (zap_dnode, zapbuf, 1, - zap_iterate_u64_transform, &transform_ctx, data); + grub_dprintf ("zfs", "fat zap value search not supported\n"); grub_free (zapbuf); - return ret; + return grub_error (GRUB_ERR_BAD_FS, "fat zap value search not supported"); } - grub_error (GRUB_ERR_BAD_FS, "unknown ZAP type"); - return 0; + + return grub_error (GRUB_ERR_BAD_FS, "zap_value_search: unknown ZAP type"); } -static int -zap_iterate (dnode_end_t * zap_dnode, - grub_size_t nameelemlen, - int (*hook) (const void *name, grub_size_t namelen, - const void *val_in, - grub_size_t nelem, grub_size_t elemsize, - void *data), - void *hook_data, struct grub_zfs_data *data) +/* Context for zap_iterate_u64. */ +struct zap_iterate_u64_ctx { - grub_uint64_t block_type; - void *zapbuf; - grub_err_t err; - int ret; - grub_zfs_endian_t endian; + int (*hook) (const char *, grub_uint64_t, struct grub_zfs_dir_ctx *); + struct grub_zfs_dir_ctx *dir_ctx; +}; - /* Read in the first block of the zap object data. */ - err = dmu_read (zap_dnode, 0, &zapbuf, &endian, data); - if (err) - return 0; - block_type = grub_zfs_to_cpu64 (*((grub_uint64_t *) zapbuf), endian); +/* Helper for zap_iterate_u64. */ +static int +zap_iterate_u64_transform (const void *name, + grub_size_t namelen __attribute__ ((unused)), + const void *val_in, + grub_size_t nelem, + grub_size_t elemsize, + void *data) +{ + struct zap_iterate_u64_ctx *ctx = data; + + if (elemsize != sizeof (grub_uint64_t) || nelem != 1) + return 0; + return ctx->hook(name, + grub_be_to_cpu64 (*(const grub_uint64_t *) val_in), ctx->dir_ctx); +} - grub_dprintf ("zfs", "zap iterate\n"); +static int +zap_iterate_u64 (dnode_end_t * zap_dnode, + int (*hook) (const char *name, grub_uint64_t val, + struct grub_zfs_dir_ctx *ctx), + struct grub_zfs_data *data, struct grub_zfs_dir_ctx *ctx) +{ + grub_uint64_t block_type; + int size; + void *zapbuf; + grub_err_t err; + int ret; + grub_zfs_endian_t endian; + + /* Read in the first block of the zap object data. */ + size = grub_zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec, + zap_dnode->endian) << SPA_MINBLOCKSHIFT; + err = dmu_read (zap_dnode, 0, &zapbuf, &endian, data); + if (err) + return 0; + block_type = grub_zfs_to_cpu64 (*((grub_uint64_t *) zapbuf), endian); + + grub_dprintf ("zfs", "zap iterate\n"); + + if (block_type == ZBT_MICRO) { + grub_dprintf ("zfs", "micro zap\n"); + ret = mzap_iterate (zapbuf, endian, size, hook, ctx); + grub_free (zapbuf); + return ret; + } else if (block_type == ZBT_HEADER) { + struct zap_iterate_u64_ctx transform_ctx = { + .hook = hook, + .dir_ctx = ctx + }; + grub_dprintf ("zfs", "fat zap\n"); + /* this is a fat zap */ + ret = fzap_iterate (zap_dnode, zapbuf, 1, + zap_iterate_u64_transform, &transform_ctx, data); + grub_free (zapbuf); + return ret; + } + grub_error (GRUB_ERR_BAD_FS, "unknown ZAP type"); + return 0; +} - if (block_type == ZBT_MICRO) - { - grub_error (GRUB_ERR_BAD_FS, "micro ZAP where FAT ZAP expected"); - return 0; - } - if (block_type == ZBT_HEADER) - { - grub_dprintf ("zfs", "fat zap\n"); - /* this is a fat zap */ - ret = fzap_iterate (zap_dnode, zapbuf, nameelemlen, hook, hook_data, - data); - grub_free (zapbuf); - return ret; - } - grub_error (GRUB_ERR_BAD_FS, "unknown ZAP type"); - return 0; +static int +zap_iterate (dnode_end_t * zap_dnode, grub_size_t nameelemlen, + int (*hook) (const void *name, grub_size_t namelen, const void *val_in, + grub_size_t nelem, grub_size_t elemsize, void *data), + void *hook_data, struct grub_zfs_data *data) +{ + grub_uint64_t block_type; + void *zapbuf; + grub_err_t err; + int ret; + grub_zfs_endian_t endian; + + /* Read in the first block of the zap object data. */ + err = dmu_read(zap_dnode, 0, &zapbuf, &endian, data); + if (err) + return 0; + block_type = grub_zfs_to_cpu64(*((grub_uint64_t *) zapbuf), endian); + + grub_dprintf("zfs", "zap iterate\n"); + + if (block_type == ZBT_MICRO) { + grub_error(GRUB_ERR_BAD_FS, "micro ZAP where FAT ZAP expected"); + return 0; + } + if (block_type == ZBT_HEADER) { + grub_dprintf ("zfs", "fat zap\n"); + /* this is a fat zap */ + ret = fzap_iterate(zap_dnode, zapbuf, nameelemlen, hook, + hook_data, data); + grub_free(zapbuf); + return ret; + } + grub_error(GRUB_ERR_BAD_FS, "unknown ZAP type"); + return 0; } @@ -2608,7 +3256,7 @@ grub_memmove (&(buf->dn), &(data->dnode_buf)[idx], DNODE_SIZE); buf->endian = data->dnode_endian; if (type && buf->dn.dn_type != type) - return grub_error(GRUB_ERR_BAD_FS, "incorrect dnode type"); + return grub_error(GRUB_ERR_BAD_FS, "[1] incorrect dnode type: %d != %d\n", buf->dn.dn_type, type); return GRUB_ERR_NONE; } @@ -2620,6 +3268,7 @@ grub_dprintf ("zfs", "alive\n"); grub_free (data->dnode_buf); + data->dnode_buf = NULL; grub_free (data->dnode_mdn); data->dnode_mdn = grub_malloc (sizeof (*mdn)); if (! data->dnode_mdn) @@ -2639,7 +3288,7 @@ grub_memmove (&(buf->dn), (dnode_phys_t *) dnbuf + idx, DNODE_SIZE); buf->endian = endian; if (type && buf->dn.dn_type != type) - return grub_error(GRUB_ERR_BAD_FS, "incorrect dnode type"); + return grub_error(GRUB_ERR_BAD_FS, "[2] incorrect dnode type: %d != %d\n", buf->dn.dn_type, type); return GRUB_ERR_NONE; } @@ -2801,7 +3450,7 @@ grub_size_t block; grub_size_t blksz; blksz = (grub_zfs_to_cpu16 (dnode_path->dn.dn.dn_datablkszsec, - dnode_path->dn.endian) + dnode_path->dn.endian) << SPA_MINBLOCKSHIFT); sym_value = grub_malloc (sym_sz); @@ -2846,11 +3495,11 @@ grub_free (dn_new); } else while (dnode_path != root) - { - dn_new = dnode_path; - dnode_path = dn_new->next; - grub_free (dn_new); - } + { + dn_new = dnode_path; + dnode_path = dn_new->next; + grub_free (dn_new); + } } if (dnode_path->dn.dn.dn_bonustype == DMU_OT_SA) { @@ -2907,11 +3556,11 @@ grub_free (dn_new); } else while (dnode_path != root) - { - dn_new = dnode_path; - dnode_path = dn_new->next; - grub_free (dn_new); - } + { + dn_new = dnode_path; + dnode_path = dn_new->next; + grub_free (dn_new); + } } } } @@ -2929,57 +3578,146 @@ return err; } -#if 0 +/* + * Get the default 'bootfs' dataset name using rootfs object number + * + */ +static grub_err_t +get_default_bootfsname (dnode_end_t * mosmdn, grub_uint64_t bootfsobj, + struct grub_zfs_data *data, char **bootfsname) +{ + dnode_end_t dn; + dnode_end_t mdn; + grub_uint64_t dirobj; + grub_uint64_t parentobj; + grub_uint64_t childobj; + grub_uint64_t rootobj; + grub_size_t buf_size; + grub_err_t err = GRUB_ERR_NONE; + char *bootfs; + + *bootfsname = 0; + if ((grub_errno = dnode_get (mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, &mdn, data))) + return (grub_errno); + + err = zap_lookup (&mdn, DMU_POOL_ROOT_DATASET, &rootobj, data, 0); + if (err) + return err; + + if ((grub_errno = dnode_get (mosmdn, bootfsobj, DMU_OT_DSL_DATASET, &dn, data))) + return (grub_errno); + + dirobj = grub_zfs_to_cpu64 (((dsl_dataset_phys_t *) + DN_BONUS (&dn.dn))->ds_dir_obj, dn.endian); + + buf_size = BOOTFSNAME_SIZE; + bootfs = grub_zalloc(buf_size); + do + { + if ((grub_errno = dnode_get (mosmdn, dirobj, DMU_OT_DSL_DIR, &dn, data))) + { + grub_free (bootfs); + return (grub_errno); + } + + parentobj = grub_zfs_to_cpu64 ((((dsl_dir_phys_t *) + DN_BONUS (&dn.dn)))->dd_parent_obj, dn.endian); + + if ((grub_errno = dnode_get (mosmdn, parentobj, DMU_OT_DSL_DIR, &dn, data))) + { + grub_free (bootfs); + return (grub_errno); + } + + childobj = grub_zfs_to_cpu64 ((((dsl_dir_phys_t *) + DN_BONUS (&dn.dn)))->dd_child_dir_zapobj, dn.endian); + + if ((grub_errno = dnode_get (mosmdn, childobj, + DMU_OT_DSL_DIR_CHILD_MAP, &dn, data))) + { + grub_free (bootfs); + return (grub_errno); + } + + char cname[64]; + grub_memset (cname, 0, sizeof(cname)); + + if (zap_value_search (&dn, cname, &dirobj, data)) + { + grub_free (bootfs); + return (GRUB_ERR_BAD_FS); + } + + grub_size_t cname_len = grub_strlen(cname); + cname[cname_len++] = '/'; + + grub_size_t bootfs_len= grub_strlen(bootfs); + grub_size_t expected_len = bootfs_len + cname_len; + if (expected_len >= buf_size) + { + while((buf_size = buf_size * 2) < expected_len); + char *tmp = grub_realloc (bootfs, buf_size); + if (! tmp) + { + grub_free (bootfs); + return (grub_errno); + } + bootfs = tmp; + grub_memset((bootfs + bootfs_len), 0, (buf_size - bootfs_len)); + } + + /* create space for parent dataset name */ + grub_memmove((bootfs + cname_len), bootfs, bootfs_len); + grub_memmove(bootfs, cname, cname_len); + + } while ((dirobj = parentobj) != rootobj); + + /* remove trailing slash */ + *(bootfs + grub_strlen(bootfs) - 1) = '\0'; + *bootfsname = bootfs; + + grub_dprintf ("zfs", "get_default_bootfsname: %s\n", *bootfsname); + return (0); +} + /* * Get the default 'bootfs' property value from the rootpool. * */ static grub_err_t -get_default_bootfsobj (dnode_phys_t * mosmdn, grub_uint64_t * obj, +get_default_bootfsobj (dnode_end_t * mosmdn, grub_uint64_t * obj, struct grub_zfs_data *data) { grub_uint64_t objnum = 0; - dnode_phys_t *dn; - if (!dn) - return grub_errno; + dnode_end_t dn; + grub_dprintf ("zfs", "get_default_bootfsobj called\n"); if ((grub_errno = dnode_get (mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, dn, data))) - { - grub_free (dn); + DMU_OT_OBJECT_DIRECTORY, &dn, data))) return (grub_errno); - } /* * find the object number for 'pool_props', and get the dnode * of the 'pool_props'. */ - if (zap_lookup (dn, DMU_POOL_PROPS, &objnum, data)) - { - grub_free (dn); + if (zap_lookup (&dn, DMU_POOL_PROPS, &objnum, data, 0)) return (GRUB_ERR_BAD_FS); - } - if ((grub_errno = dnode_get (mosmdn, objnum, DMU_OT_POOL_PROPS, dn, data))) - { - grub_free (dn); + + if ((grub_errno = dnode_get (mosmdn, objnum, DMU_OT_POOL_PROPS, &dn, data))) return (grub_errno); - } - if (zap_lookup (dn, ZPOOL_PROP_BOOTFS, &objnum, data)) - { - grub_free (dn); + + if (zap_lookup (&dn, ZPOOL_PROP_BOOTFS, &objnum, data, 0)) return (GRUB_ERR_BAD_FS); - } if (!objnum) - { - grub_free (dn); return (GRUB_ERR_BAD_FS); - } *obj = objnum; + return (0); } -#endif + /* * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), * e.g. pool/rootfs, or a given object number (obj), e.g. the object number @@ -3036,6 +3774,9 @@ *fsname = 0; childobj = grub_zfs_to_cpu64 ((((dsl_dir_phys_t *) DN_BONUS (&mdn->dn)))->dd_child_dir_zapobj, mdn->endian); + if (childobj == 0) + return grub_error(GRUB_ERR_BAD_FS, "file system not found"); + err = dnode_get (mosmdn, childobj, DMU_OT_DSL_DIR_CHILD_MAP, mdn, data); if (err) @@ -3084,261 +3825,498 @@ /* Context for dnode_get_fullpath. */ struct dnode_get_fullpath_ctx { - struct subvolume *subvol; - grub_uint64_t salt; - int keyn; + struct subvolume *subvol; + grub_uint64_t salt; + int keyn; }; /* Helper for dnode_get_fullpath. */ static int count_zap_keys (const void *name __attribute__ ((unused)), - grub_size_t namelen __attribute__ ((unused)), - const void *val_in __attribute__ ((unused)), - grub_size_t nelem __attribute__ ((unused)), - grub_size_t elemsize __attribute__ ((unused)), - void *data) + grub_size_t namelen __attribute__ ((unused)), + const void *val_in __attribute__ ((unused)), + grub_size_t nelem __attribute__ ((unused)), + grub_size_t elemsize __attribute__ ((unused)), + void *data) { - struct dnode_get_fullpath_ctx *ctx = data; + struct dnode_get_fullpath_ctx *ctx = data; - ctx->subvol->nkeys++; - return 0; + ctx->subvol->nkeys++; + return 0; } -/* Helper for dnode_get_fullpath. */ +/* Helper for dnode_get_fullpath. */ static int load_zap_key (const void *name, grub_size_t namelen, const void *val_in, - grub_size_t nelem, grub_size_t elemsize, void *data) + grub_size_t nelem, grub_size_t elemsize, void *data) { - struct dnode_get_fullpath_ctx *ctx = data; + struct dnode_get_fullpath_ctx *ctx = data; - if (namelen != 1) - { - grub_dprintf ("zfs", "Unexpected key index size %" PRIuGRUB_SIZE "\n", - namelen); - return 0; - } + if (namelen != 1) { + grub_dprintf("zfs", + "Unexpected key index size %" PRIuGRUB_SIZE "\n", namelen); + return 0; + } - if (elemsize != 1) - { - grub_dprintf ("zfs", "Unexpected key element size %" PRIuGRUB_SIZE "\n", + if (elemsize != 1) { + grub_dprintf("zfs", + "Unexpected key element size %" PRIuGRUB_SIZE "\n", elemsize); - return 0; - } + return 0; + } - ctx->subvol->keyring[ctx->keyn].txg = - grub_be_to_cpu64 (*(grub_uint64_t *) name); - ctx->subvol->keyring[ctx->keyn].algo = - grub_le_to_cpu64 (*(grub_uint64_t *) val_in); - ctx->subvol->keyring[ctx->keyn].cipher = - grub_zfs_load_key (val_in, nelem, ctx->salt, - ctx->subvol->keyring[ctx->keyn].algo); - ctx->keyn++; - return 0; + ctx->subvol->keyring[ctx->keyn].txg = + grub_be_to_cpu64(*(grub_uint64_t *) name); + ctx->subvol->keyring[ctx->keyn].algo = + grub_le_to_cpu64(*(grub_uint64_t *) val_in); + ctx->subvol->keyring[ctx->keyn].cipher = + grub_zfs_load_key(val_in, nelem, ctx->salt, + ctx->subvol->keyring[ctx->keyn].algo); + ctx->keyn++; + return 0; } static grub_err_t dnode_get_fullpath (const char *fullpath, struct subvolume *subvol, - dnode_end_t * dn, int *isfs, - struct grub_zfs_data *data) + dnode_end_t *dn, int *isfs, struct grub_zfs_data *data) { - char *fsname, *snapname; - const char *ptr_at, *filename; - grub_uint64_t headobj; - grub_uint64_t keychainobj; - grub_err_t err; - - ptr_at = grub_strchr (fullpath, '@'); - if (! ptr_at) - { - *isfs = 1; - filename = 0; - snapname = 0; - fsname = grub_strdup (fullpath); - } - else - { - const char *ptr_slash = grub_strchr (ptr_at, '/'); - - *isfs = 0; - fsname = grub_malloc (ptr_at - fullpath + 1); - if (!fsname) - return grub_errno; - grub_memcpy (fsname, fullpath, ptr_at - fullpath); - fsname[ptr_at - fullpath] = 0; - if (ptr_at[1] && ptr_at[1] != '/') - { - snapname = grub_malloc (ptr_slash - ptr_at); - if (!snapname) - { - grub_free (fsname); - return grub_errno; - } - grub_memcpy (snapname, ptr_at + 1, ptr_slash - ptr_at - 1); - snapname[ptr_slash - ptr_at - 1] = 0; - } - else - snapname = 0; - if (ptr_slash) - filename = ptr_slash; - else - filename = "/"; - grub_dprintf ("zfs", "fsname = '%s' snapname='%s' filename = '%s'\n", + char *fsname, *snapname; + const char *ptr_at, *filename; + grub_uint64_t headobj; + grub_uint64_t keychainobj; + grub_err_t err; + + ptr_at = grub_strchr(fullpath, '@'); + if (!ptr_at) { + *isfs = 1; + filename = 0; + snapname = 0; + fsname = grub_strdup(fullpath); + } else { + const char *ptr_slash = grub_strchr(ptr_at, '/'); + + *isfs = 0; + fsname = grub_malloc(ptr_at - fullpath + 1); + if (!fsname) + return grub_errno; + grub_memcpy(fsname, fullpath, ptr_at - fullpath); + fsname[ptr_at - fullpath] = 0; + if (ptr_at[1] && ptr_at[1] != '/') { + snapname = grub_malloc(ptr_slash - ptr_at); + if (!snapname) { + grub_free(fsname); + return grub_errno; + } + grub_memcpy(snapname, ptr_at + 1, + ptr_slash - ptr_at - 1); + snapname[ptr_slash - ptr_at - 1] = 0; + } else + snapname = 0; + if (ptr_slash) + filename = ptr_slash; + else + filename = "/"; + grub_dprintf("zfs", + "fsname = '%s' snapname='%s' filename = '%s'\n", fsname, snapname, filename); - } - grub_dprintf ("zfs", "alive\n"); - err = get_filesystem_dnode (&(data->mos), fsname, dn, data); - if (err) - { - grub_free (fsname); - grub_free (snapname); - return err; - } - - grub_dprintf ("zfs", "alive\n"); + } + grub_dprintf ("zfs", "alive\n"); + err = get_filesystem_dnode(&(data->mos), fsname, dn, data); + if (err) { + grub_free(fsname); + grub_free(snapname); + return err; + } - headobj = grub_zfs_to_cpu64 (((dsl_dir_phys_t *) DN_BONUS (&dn->dn))->dd_head_dataset_obj, dn->endian); + grub_dprintf("zfs", "alive\n"); - grub_dprintf ("zfs", "endian = %d\n", subvol->mdn.endian); + headobj = grub_zfs_to_cpu64( + ((dsl_dir_phys_t *) DN_BONUS(&dn->dn))->dd_head_dataset_obj, + dn->endian); + + grub_dprintf("zfs", "endian = %d\n", subvol->mdn.endian); + + err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, + &subvol->mdn, data); + if (err) { + grub_free(fsname); + grub_free(snapname); + return err; + } + grub_dprintf ("zfs", "endian = %d\n", subvol->mdn.endian); - err = dnode_get (&(data->mos), headobj, DMU_OT_DSL_DATASET, &subvol->mdn, - data); - if (err) - { - grub_free (fsname); - grub_free (snapname); - return err; - } - grub_dprintf ("zfs", "endian = %d\n", subvol->mdn.endian); + keychainobj = grub_zfs_to_cpu64 ( + ((dsl_dir_phys_t *) DN_BONUS(&dn->dn))->keychain, dn->endian); + if (grub_zfs_load_key && keychainobj) { + struct dnode_get_fullpath_ctx ctx = { + .subvol = subvol, + .keyn = 0 + }; + dnode_end_t keychain_dn, props_dn; + grub_uint64_t propsobj; + propsobj = grub_zfs_to_cpu64( + ((dsl_dir_phys_t *) DN_BONUS(&dn->dn))->dd_props_zapobj, + dn->endian); + + err = dnode_get(&(data->mos), propsobj, DMU_OT_DSL_PROPS, + &props_dn, data); + if (err) { + grub_free(fsname); + grub_free(snapname); + return err; + } - keychainobj = grub_zfs_to_cpu64 (((dsl_dir_phys_t *) DN_BONUS (&dn->dn))->keychain, dn->endian); - if (grub_zfs_load_key && keychainobj) - { - struct dnode_get_fullpath_ctx ctx = { - .subvol = subvol, - .keyn = 0 - }; - dnode_end_t keychain_dn, props_dn; - grub_uint64_t propsobj; - propsobj = grub_zfs_to_cpu64 (((dsl_dir_phys_t *) DN_BONUS (&dn->dn))->dd_props_zapobj, dn->endian); + err = zap_lookup (&props_dn, "salt", &ctx.salt, data, 0); + if (err == GRUB_ERR_FILE_NOT_FOUND) { + err = 0; + grub_errno = 0; + ctx.salt = 0; + } + if (err) { + grub_dprintf("zfs", "failed here\n"); + return err; + } - err = dnode_get (&(data->mos), propsobj, DMU_OT_DSL_PROPS, - &props_dn, data); - if (err) - { - grub_free (fsname); - grub_free (snapname); - return err; + err = dnode_get(&(data->mos), keychainobj, DMU_OT_DSL_KEYCHAIN, + &keychain_dn, data); + if (err) { + grub_free(fsname); + grub_free(snapname); + return err; + } + subvol->nkeys = 0; + zap_iterate(&keychain_dn, 8, count_zap_keys, &ctx, data); + subvol->keyring = grub_zalloc(subvol->nkeys * + sizeof (subvol->keyring[0])); + if (!subvol->keyring) { + grub_free(fsname); + grub_free(snapname); + return err; + } + zap_iterate(&keychain_dn, 8, load_zap_key, &ctx, data); } - err = zap_lookup (&props_dn, "salt", &ctx.salt, data, 0); - if (err == GRUB_ERR_FILE_NOT_FOUND) - { - err = 0; - grub_errno = 0; - ctx.salt = 0; - } - if (err) - { - grub_dprintf ("zfs", "failed here\n"); - return err; - } + if (snapname) { + grub_uint64_t snapobj; - err = dnode_get (&(data->mos), keychainobj, DMU_OT_DSL_KEYCHAIN, - &keychain_dn, data); - if (err) - { - grub_free (fsname); - grub_free (snapname); - return err; - } - subvol->nkeys = 0; - zap_iterate (&keychain_dn, 8, count_zap_keys, &ctx, data); - subvol->keyring = grub_zalloc (subvol->nkeys * sizeof (subvol->keyring[0])); - if (!subvol->keyring) - { - grub_free (fsname); - grub_free (snapname); - return err; + snapobj = grub_zfs_to_cpu64( + ((dsl_dataset_phys_t *) + DN_BONUS(&subvol->mdn.dn))->ds_snapnames_zapobj, + subvol->mdn.endian); + + err = dnode_get(&(data->mos), snapobj, + DMU_OT_DSL_DS_SNAP_MAP, &subvol->mdn, data); + if (!err) + err = zap_lookup(&subvol->mdn, snapname, + &headobj, data, 0); + if (!err) + err = dnode_get(&(data->mos), headobj, + DMU_OT_DSL_DATASET, &subvol->mdn, data); + if (err) { + grub_free(fsname); + grub_free(snapname); + return err; + } } - zap_iterate (&keychain_dn, 8, load_zap_key, &ctx, data); - } - - if (snapname) - { - grub_uint64_t snapobj; - - snapobj = grub_zfs_to_cpu64 (((dsl_dataset_phys_t *) DN_BONUS (&subvol->mdn.dn))->ds_snapnames_zapobj, subvol->mdn.endian); - err = dnode_get (&(data->mos), snapobj, - DMU_OT_DSL_DS_SNAP_MAP, &subvol->mdn, data); - if (!err) - err = zap_lookup (&subvol->mdn, snapname, &headobj, data, 0); - if (!err) - err = dnode_get (&(data->mos), headobj, DMU_OT_DSL_DATASET, - &subvol->mdn, data); - if (err) - { - grub_free (fsname); - grub_free (snapname); - return err; - } - } + subvol->obj = headobj; - subvol->obj = headobj; + make_mdn(&subvol->mdn, data); - make_mdn (&subvol->mdn, data); - - grub_dprintf ("zfs", "endian = %d\n", subvol->mdn.endian); + grub_dprintf("zfs", "endian = %d\n", subvol->mdn.endian); - if (*isfs) - { - grub_free (fsname); - grub_free (snapname); - return GRUB_ERR_NONE; - } - err = dnode_get_path (subvol, filename, dn, data); - grub_free (fsname); - grub_free (snapname); - return err; + if (*isfs) { + grub_free(fsname); + grub_free(snapname); + return GRUB_ERR_NONE; + } + err = dnode_get_path(subvol, filename, dn, data); + grub_free(fsname); + grub_free(snapname); + return err; } -static int -nvlist_find_value (const char *nvlist_in, const char *name, - int valtype, char **val, - grub_size_t *size_out, grub_size_t *nelm_out) +/* + * Checks whether the MOS features that are active are supported by this + * (GRUB's) implementation of ZFS. + * + * Return: + * 0: Success. + * errnum: Failure. + */ +static grub_err_t +check_mos_features(dnode_end_t *mosmdn, struct grub_zfs_data *data) { - grub_size_t nvp_name_len, name_len = grub_strlen(name); - int type; - const char *nvpair=NULL,*nvlist=nvlist_in; - char *nvp_name; + dnode_end_t dn; + grub_uint64_t objnum; + grub_err_t errnum = GRUB_ERR_NONE; + + if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, &dn, data)) != 0) + return (errnum); + + /* + * Find the object number for 'features_for_read' and retrieve its + * corresponding dnode. Note that we don't check features_for_write + * because GRUB is not opening the pool for write. + */ + errnum = zap_lookup(&dn, DMU_POOL_FEATURES_FOR_READ, &objnum, data, 0); + if(errnum != 0) { + /* this pool does not support features */ + if(errnum == GRUB_ERR_FILE_NOT_FOUND) + errnum = GRUB_ERR_NONE; + + return (errnum); + } + + if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA, + &dn, data)) != 0) + return (errnum); - /* Verify if the 1st and 2nd byte in the nvlist are valid. */ - /* NOTE: independently of what endianness header announces all - subsequent values are big-endian. */ - if (nvlist[0] != NV_ENCODE_XDR || (nvlist[1] != NV_LITTLE_ENDIAN - && nvlist[1] != NV_BIG_ENDIAN)) - { - grub_dprintf ("zfs", "incorrect nvlist header\n"); - grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); - return 0; - } + return (zap_iterate_u64(&dn, check_feature, data, NULL)); +} - /* - * Loop thru the nvpair list - * The XDR representation of an integer is in big-endian byte order. - */ - while ((nvpair=nvlist_next_nvpair(nvlist,nvpair))) - { - nvpair_name(nvpair,&nvp_name, &nvp_name_len); - type = nvpair_type(nvpair); - if (type == valtype - && (nvp_name_len == name_len - || (nvp_name_len > name_len && nvp_name[name_len] == '\0')) - && grub_memcmp (nvp_name, name, name_len) == 0) - { - return nvpair_value(nvpair,val,size_out,nelm_out); - } - } +/* + * For a given XDR packed nvlist, verify the first 4 bytes and move on. + * + * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : + * + * encoding method/host endian (4 bytes) + * nvl_version (4 bytes) + * nvl_nvflag (4 bytes) + * encoded nvpairs: + * encoded size of the nvpair (4 bytes) + * decoded size of the nvpair (4 bytes) + * name string size (4 bytes) + * name string data (sizeof(NV_ALIGN4(string)) + * data type (4 bytes) + * # of elements in the nvpair (4 bytes) + * data + * 2 zero's for the last nvpair + * (end of the entire list) (8 bytes) + * + */ + +/* + * The nvlist_next_nvpair() function returns a handle to the next nvpair in the + * list following nvpair. If nvpair is NULL, the first pair is returned. If + * nvpair is the last pair in the nvlist, NULL is returned. + */ +static const char * +nvlist_next_nvpair (const char *nvl, const char *nvpair) +{ + const char *nvp; + int encode_size; + int name_len; + if (nvl == NULL) + return NULL; + + if (nvpair == NULL) + { + /* skip over header, nvl_version and nvl_nvflag */ + nvpair = nvl + 4 * 3; + } + else + { + /* skip to the next nvpair */ + encode_size = grub_be_to_cpu32 (grub_get_unaligned32(nvpair)); + nvpair += encode_size; + /*If encode_size equals 0 nvlist_next_nvpair would return + * the same pair received in input, leading to an infinite loop. + * If encode_size is less than 0, this will move the pointer + * backwards, *possibly* examinining two times the same nvpair + * and potentially getting into an infinite loop. */ + if(encode_size <= 0) + { + grub_dprintf ("zfs", "nvpair with size <= 0\n"); + grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); + return NULL; + } + } + /* 8 bytes of 0 marks the end of the list */ + if (grub_get_unaligned64 (nvpair) == 0) + return NULL; + /*consistency checks*/ + if (nvpair + 4 * 3 >= nvl + VDEV_PHYS_SIZE) + { + grub_dprintf ("zfs", "nvlist overflow\n"); + grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); + return NULL; + } + encode_size = grub_be_to_cpu32 (grub_get_unaligned32(nvpair)); + + nvp = nvpair + 4*2; + name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + nvp += 4; + + nvp = nvp + ((name_len + 3) & ~3); /* align */ + if (nvp + 4 >= nvl + VDEV_PHYS_SIZE + || encode_size < 0 + || nvp + 4 + encode_size > nvl + VDEV_PHYS_SIZE) + { + grub_dprintf ("zfs", "nvlist overflow\n"); + grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); + return NULL; + } + /* end consistency checks */ + + return nvpair; +} + +/* + * This function returns 0 on success and 1 on failure. On success, a string + * containing the name of nvpair is saved in buf. + */ +static char * +nvpair_name(const char *nvp) +{ + int len; + char *buf = NULL; + + /* skip over encode/decode size */ + nvp += 4 * 2; + len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + buf = grub_malloc(len + 1); + if (buf ) { + grub_memmove(buf, nvp + 4, len); + buf[len] = '\0'; + } + return buf; +} + +#if 0 +/* + * This function retrieves the value of the nvpair in the form of enumerated + * type data_type_t. + */ +static int +nvpair_type (const char *nvp) +{ + int name_len, type; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + /* skip over name_len */ + name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + nvp += 4; + + /* skip over name */ + nvp = nvp + ((name_len + 3) & ~3); /* align */ + + type = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + + return type; +} + +static int +nvpair_value (const char *nvp,char **val, + grub_size_t *size_out, grub_size_t *nelm_out) +{ + int name_len,nelm,encode_size; + + /* skip over encode/decode size */ + encode_size = grub_be_to_cpu32 (grub_get_unaligned32(nvp)); + nvp += 8; + + /* skip over name_len */ + name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + nvp += 4; + + /* skip over name */ + nvp = nvp + ((name_len + 3) & ~3); /* align */ + + /* skip over type */ + nvp += 4; + nelm = grub_be_to_cpu32 (grub_get_unaligned32 (nvp)); + nvp +=4; + if (nelm < 1) + { + grub_error (GRUB_ERR_BAD_FS, "empty nvpair"); + return 0; + } + *val = (char *) nvp; + *size_out = encode_size; + if (nelm_out) + *nelm_out = nelm; + + return 1; +} +#endif + +static int +nvlist_find_value (const char *nvlist_in, const char *name, + int valtype, char **val, + grub_size_t *size_out, grub_size_t *nelm_out) +{ + int name_len, type, encode_size; + const char *nvpair, *nvp_name, *nvlist = nvlist_in; + + /* Verify if the 1st and 2nd byte in the nvlist are valid. */ + /* NOTE: independently of what endianness header announces all + subsequent values are big-endian. */ + if (nvlist[0] != NV_ENCODE_XDR || (nvlist[1] != NV_LITTLE_ENDIAN + && nvlist[1] != NV_BIG_ENDIAN)) + { + grub_dprintf ("zfs", "incorrect nvlist header\n"); + grub_error (GRUB_ERR_BAD_FS, "incorrect nvlist"); + return 0; + } + + /* skip the header, nvl_version, and nvl_nvflag */ + nvlist = nvlist + 4 * 3; + /* + * Loop thru the nvpair list + * The XDR representation of an integer is in big-endian byte order. + */ + while ((encode_size = grub_be_to_cpu32 (grub_get_unaligned32 (nvlist)))) + { + int nelm; + + if (nvlist + 4 * 4 >= nvlist_in + VDEV_PHYS_SIZE) + { + grub_dprintf("zfs", "nvlist overflow\n"); + grub_error(GRUB_ERR_BAD_FS, "incorrect nvlist"); + return 0; + } + + nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ + + name_len = grub_be_to_cpu32 (grub_get_unaligned32 (nvpair)); + nvpair += 4; + + nvp_name = nvpair; + nvpair = nvpair + ((name_len + 3) & ~3); /* align */ + + if (nvpair + 8 >= nvlist_in + VDEV_PHYS_SIZE + || encode_size < 0 + || nvpair + 8 + encode_size > nvlist_in + VDEV_PHYS_SIZE) + { + grub_dprintf("zfs", "nvlist overflow\n"); + grub_error(GRUB_ERR_BAD_FS, "incorrect nvlist"); + return 0; + } + + type = grub_be_to_cpu32 (grub_get_unaligned32 (nvpair)); + nvpair += 4; + + nelm = grub_be_to_cpu32 (grub_get_unaligned32 (nvpair)); + if (nelm < 1) + return grub_error (GRUB_ERR_BAD_FS, "empty nvpair"); + + nvpair += 4; + + if ((grub_strncmp (nvp_name, name, name_len) == 0) && type == valtype) + { + *val = (char *) nvpair; + *size_out = encode_size; + if (nelm_out) + *nelm_out = nelm; + return 1; + } + + nvlist += encode_size; /* goto the next nvpair */ + } return 0; } @@ -3497,92 +4475,964 @@ } static void -unmount_device (struct grub_zfs_device_desc *desc) +mount_device (struct grub_zfs_device_desc *desc, grub_device_t dev, char *name) +{ + unsigned i; + switch (desc->type) + { + case DEVICE_LEAF: + if (desc->dev_name) + { + if (grub_strcmp(desc->dev_name, name) == 0) + { + grub_dprintf ("zfs", "mount_device: using device %s\n", name); + desc->dev = dev; + desc->original = 1; + } + else + { + desc->original = 0; /* just to be safe */ + grub_dprintf ("zfs", "mount_device: open device %s\n", name); + if (desc->dev == NULL) + desc->dev = grub_device_open (desc->dev_name); + } + } + return; + case DEVICE_RAIDZ: + case DEVICE_MIRROR: + for (i = 0; i < desc->n_children; i++) + mount_device (&desc->children[i], dev, name); + return; + } +} + +static void +unmount_device (struct grub_zfs_device_desc *desc, int zcached) { unsigned i; + + if (desc->config) + grub_free (desc->config); + switch (desc->type) { case DEVICE_LEAF: - if (!desc->original && desc->dev) - grub_device_close (desc->dev); + if (desc->dev) + { + /* + * make sure we dont close dev provided by caller of zfs_mount() + */ + if (!desc->original) + { + grub_dprintf ("zfs", "unmount_device: closing device %s\n", + desc->dev_name); + grub_device_close (desc->dev); + } + desc->dev = NULL; + } + desc->original = 0; /* next caller may use different device */ + if (! zcached) + grub_free (desc->dev_name); return; case DEVICE_RAIDZ: case DEVICE_MIRROR: for (i = 0; i < desc->n_children; i++) - unmount_device (&desc->children[i]); - grub_free (desc->children); + unmount_device (&desc->children[i], zcached); return; } -} +} + +static void +zfs_unmount (struct grub_zfs_data *data) +{ + unsigned i; + + grub_dprintf("zfs", "zfs_unmount: %p\n", data); + + if (data == NULL) + return; + + grub_free (data->label); + data->label = NULL; + grub_free (data->dnode_buf); + data->dnode_buf = NULL; + grub_free (data->dnode_mdn); + data->dnode_mdn = NULL; + grub_free (data->file_buf); + data->file_buf = NULL; + data->file_start = 0; + data->file_end = 0; + data->dnode_start = 0; + data->dnode_end = 0; + + for (i = 0; i < data->subvol.nkeys; i++) + grub_crypto_cipher_close (data->subvol.keyring[i].cipher); + grub_free (data->subvol.keyring); + data->subvol.nkeys = 0; + data->subvol.keyring = NULL; + + for (i = 0; i < data->n_devices_attached; i++) + { + unmount_device (&data->devices_attached[i], data->zcached); + if (! data->zcached) + grub_free (data->devices_attached[i].children); + } + if (! data->zcached) + { + grub_free (data->devices_attached); + grub_free (data); + } +} + +/* + * Free all the caches + */ +static void +zfs_free_caches (void) +{ + unsigned int i; + struct grub_zfs_data *data; + struct zfs_mount_cache *zcache, *zcache_next; + struct zfs_dev_notzfs *dev_notzfs, *dev_notzfs_next; + + /* free the zfs mount cache list */ + zcache = zfs_mount_cache_list; + while (zcache != NULL) + { + data = zcache->zcache_zfs_data; + if (data) + { + for (i = 0; i < data->n_devices_attached; i++) + { + unmount_device (&data->devices_attached[i], 0); + grub_free (data->devices_attached[i].children); + } + grub_free (data->devices_attached); + + for (i = 0; i < data->subvol.nkeys; i++) + grub_crypto_cipher_close (data->subvol.keyring[i].cipher); + grub_free (data->subvol.keyring); + + grub_free (data); + zcache->zcache_zfs_data = NULL; + } + + grub_free (zcache->zcache_pool_name); + zcache->zcache_pool_name = NULL; + zcache->zcache_pool_guid = 0; + + zcache_next = zcache->next; + grub_free (zcache); + zcache = zcache_next; + } + zfs_mount_cache_list = NULL; + + /* free the cache list for non-zfs disk dev */ + dev_notzfs = zfs_dev_notzfs_list; + while (dev_notzfs != NULL) + { + grub_free (dev_notzfs->dev_name); + dev_notzfs_next = dev_notzfs->next; + grub_free (dev_notzfs); + dev_notzfs = dev_notzfs_next; + } + zfs_dev_notzfs_list = NULL; +} + +static grub_off_t +vdev_label_offset(grub_uint64_t psize, int l, grub_uint64_t offset) +{ + return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? + 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); +} + +static grub_err_t +vdev_disk_read_rootlabel(grub_device_t dev, char **config) +{ + int vdevnum; + int l; + vdev_phys_t *vp; + grub_uint64_t s, size; + grub_err_t err; + + vdevnum = VDEV_LABELS; + + vp = grub_malloc (VDEV_PHYS_SIZE); + *config = NULL; + + if (!vp) + { + grub_dprintf("zfs", "vdev_phys_t allocation failed.\n"); + return grub_errno; + } + + size = s = grub_disk_get_size (dev->disk); + + /* Don't check back labels on CDROM. */ + if (s == GRUB_DISK_SIZE_UNKNOWN) + vdevnum = VDEV_LABELS / 2; + else + { + /* grub_disk_get_size() returns sector count */ + s = s << dev->disk->log_sector_size; + size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), grub_uint64_t); + } + + for (l = 0; l < vdevnum; l++) + { + grub_uint64_t state, txg; + grub_off_t offset; + char *nvl; + + offset = vdev_label_offset(size, l, 0); + offset += VDEV_SKIP_SIZE; /* pad1 */ + offset += VDEV_BOOT_HEADER_SIZE; /* boot header */ + + /* Read in the vdev_phys. */ + grub_dprintf ("zfs", "vdev_disk_read_rootlabel: label[%d] %" + PRIuGRUB_UINT64_T "\n", l, offset); + + err = grub_disk_read (dev->disk, 0, offset, + VDEV_PHYS_SIZE, (char *) vp); + if (err) + { + grub_errno = GRUB_ERR_NONE; + continue; + } + else + { + zio_cksum_t zc; + grub_zfs_endian_t endian; + + if (grub_zfs_to_cpu64 (vp->vp_zbt.zec_magic, + GRUB_ZFS_LITTLE_ENDIAN) == ZEC_MAGIC) + endian = GRUB_ZFS_LITTLE_ENDIAN; + else if (grub_zfs_to_cpu64 (vp->vp_zbt.zec_magic, + GRUB_ZFS_BIG_ENDIAN) == ZEC_MAGIC) + endian = GRUB_ZFS_BIG_ENDIAN; + else + continue; + + ZIO_SET_CHECKSUM (&zc, offset, 0, 0, 0); + err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian, + (char *)vp, VDEV_PHYS_SIZE); + if (err) + { + grub_errno = GRUB_ERR_NONE; + continue; + } + } + + nvl = vp->vp_nvlist; + if (!grub_zfs_nvlist_lookup_uint64 (nvl, ZPOOL_CONFIG_POOL_STATE, + &state) && state >= POOL_STATE_DESTROYED) + { + continue; + } + + if (!grub_zfs_nvlist_lookup_uint64 (nvl, ZPOOL_CONFIG_POOL_TXG, + &txg) && txg == 0) + { + continue; + } + + *config = grub_malloc(VDEV_PHYS_SIZE - sizeof (zio_eck_t)); + if (!*config) + { + grub_dprintf("zfs", "nvlist allocation failed.\n"); + grub_free(vp); + return grub_errno; + } + grub_memcpy(*config, nvl, VDEV_PHYS_SIZE - sizeof (zio_eck_t)); + break; + } + + grub_free(vp); + if (*config == NULL) + err = GRUB_ERR_BAD_FS; + + return err; +} + +/* add device to grub_zfs_device_desc */ +static grub_err_t +vdev_add(struct grub_zfs_device_desc *desc, grub_device_t dev, char *nv, + char *dev_name, int original) +{ + char *type; + grub_uint64_t u; + + type = grub_zfs_nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE); + + if (grub_strcmp (type, VDEV_TYPE_MIRROR) == 0) + desc->type = DEVICE_MIRROR; + else if (grub_strcmp (type, VDEV_TYPE_RAIDZ) == 0) + desc->type = DEVICE_RAIDZ; + else if (grub_strcmp (type, VDEV_TYPE_DISK) == 0 || + grub_strcmp (type, VDEV_TYPE_FILE) == 0) + desc->type = DEVICE_LEAF; + else + { + grub_dprintf("zfs", "vdev_add: vdev %d unknown type %s\n", + (int)desc->id, type); + grub_free (type); + return GRUB_ERR_BAD_FS; + } + grub_free (type); + + if (desc->type > DEVICE_LEAF) + { + unsigned i; + desc->n_children = grub_zfs_nvlist_lookup_nvlist_array_get_nelm + (nv, ZPOOL_CONFIG_CHILDREN); + desc->children = grub_zalloc (sizeof (desc->children[0]) + * desc->n_children); + if (! desc->children) + { + grub_dprintf("zfs", "vdev_add: out of memory\n"); + return GRUB_ERR_OUT_OF_MEMORY; + } + /* set children state to DEVICE_ERROR */ + for (i = 0; i < desc->n_children; i++) + desc->children[i].dev_state = DEVICE_ERROR; + } + else + { + desc->n_children = 0; + if (grub_zfs_vdev_validate(nv) || dev == NULL) + desc->dev_state = DEVICE_ERROR; + else + desc->dev_state = DEVICE_OK; + desc->dev = dev; + desc->dev_name = dev_name; + desc->original = original; + desc->ashift = dev->disk->log_sector_size; + grub_dprintf("zfs", "vdev_add: added disk %s, state: %d original: %d\n", + desc->dev_name, desc->dev_state, desc->original); + } + + if (desc->type == DEVICE_RAIDZ) + { + if (!grub_zfs_nvlist_lookup_uint64 (nv, ZPOOL_CONFIG_NPARITY, &u)) + { + grub_dprintf("zfs", "vdev_add: can't get parity\n"); + return GRUB_ERR_BAD_FS; + } + desc->nparity = u; + } + + if (grub_zfs_nvlist_lookup_uint64 (nv, ZPOOL_CONFIG_ASHIFT, &u)) + desc->ashift = u; + + grub_dprintf("zfs", "vdev_add: ashift: %d\n", desc->ashift); + + return GRUB_ERR_NONE; +} + +/* attach device to vdev tree */ +static grub_err_t +vdev_attach(struct grub_zfs_data *data, grub_device_t dev, char *config, + char *dev_name, int original) +{ + grub_uint64_t id, guid, txg, n_children = 0; + struct grub_zfs_device_desc *desc; + char *nvtop; + unsigned i; + grub_err_t err = GRUB_ERR_NONE; + + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_POOL_TXG, &txg)) + { + grub_dprintf ("zfs", "can't read vdev txg\n"); + return GRUB_ERR_BAD_FS; + } + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_TOP_GUID, &guid)) + { + grub_dprintf ("zfs", "vdev_attach: can't read top guid\n"); + return GRUB_ERR_BAD_FS; + } + + nvtop = grub_zfs_nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); + if (!nvtop) + { + grub_dprintf("zfs", "read vdev tree failed\n"); + return GRUB_ERR_BAD_FS; + } + + if (!grub_zfs_nvlist_lookup_uint64 (nvtop, ZPOOL_CONFIG_ID, &id)) + { + grub_dprintf ("zfs", "can't read vdev id\n"); + grub_free (nvtop); + return GRUB_ERR_BAD_FS; + } + grub_dprintf ("zfs", "vdev id: %" PRIxGRUB_UINT64_T " guid: %" + PRIxGRUB_UINT64_T "\n", id, guid); + + if (id > data->n_devices_allocated) + { + data->devices_attached = grub_realloc(data->devices_attached, + sizeof (data->devices_attached[0]) + * (id + 1)); + for (i = data->n_devices_allocated; i < (id + 1); i++) + grub_memset(&data->devices_attached[i], 0, + sizeof (data->devices_attached[0])); + data->n_devices_allocated = id + 1; + } + + desc = &data->devices_attached[id]; + + /* is it new vdev? */ + if (desc->guid == 0) + { + grub_dprintf ("zfs", "adding vdev: %d guid: %" + PRIxGRUB_UINT64_T "\n", (int)id, guid); + desc->id = id; + desc->guid = guid; + desc->txg = txg; + err = vdev_add(desc, dev, nvtop, dev_name, original); + if (err) + { + grub_dprintf ("zfs", "vdev_add failed\n"); + grub_free (nvtop); + return err; + } + + data->n_devices_attached++; + } + else + { + /* this vdev is set up, so the top guid must match with vdev guid */ + if (desc->guid != guid) + { + grub_dprintf ("zfs", "vdev_attach: the top vdev guid mismatch\n"); + grub_free (nvtop); + return GRUB_ERR_BAD_FS; + } + } + + /* add us in child list if needed */ + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_GUID, &guid)) + { + grub_dprintf ("zfs", "vdev_attach: can't read guid\n"); + grub_free (nvtop); + return GRUB_ERR_BAD_FS; + } + + if (desc->type > DEVICE_LEAF) + { + n_children = grub_zfs_nvlist_lookup_nvlist_array_get_nelm + (nvtop, ZPOOL_CONFIG_CHILDREN); + if (n_children > desc->n_children) + { + desc->children = grub_realloc(desc->children, + sizeof (desc->children[0]) + * n_children); + for (i = desc->n_children; i < n_children; i++) + grub_memset(&desc->children[i], 0, sizeof (desc->children[0])); + desc->n_children = n_children; + } + } + + for (i = 0; i < desc->n_children; i++) + { + grub_uint64_t cguid; + struct grub_zfs_device_desc *cdesc; + char *child; + + child = grub_zfs_nvlist_lookup_nvlist_array(nvtop, + ZPOOL_CONFIG_CHILDREN, i); + + if (!grub_zfs_nvlist_lookup_uint64 (child, ZPOOL_CONFIG_ID, &id)) + { + grub_dprintf ("zfs", "vdev_attach: can't read id\n"); + grub_free (child); + grub_free (nvtop); + return GRUB_ERR_BAD_FS; + } + + if (!grub_zfs_nvlist_lookup_uint64 (child, ZPOOL_CONFIG_GUID, &cguid)) + { + grub_dprintf ("zfs", "vdev_attach: can't read guid\n"); + grub_free (child); + grub_free (nvtop); + return GRUB_ERR_BAD_FS; + } + + cdesc = &desc->children[id]; + if (cguid == guid && i == id && cdesc->guid == 0) + { + grub_dprintf ("zfs", "adding child: %d guid: %" + PRIxGRUB_UINT64_T "\n", i, guid); + + cdesc->id = id; + cdesc->guid = guid; + if (cdesc->txg < txg) + { + cdesc->txg = txg; + if (cdesc->config != NULL) + grub_free (cdesc->config); + cdesc->config = child; + } + + err = vdev_add(cdesc, dev, child, dev_name, original); + if (err) + { + grub_dprintf ("zfs", "vdev_add failed\n"); + if (cdesc->config == child) + cdesc->config = NULL; + grub_free (child); + grub_free (nvtop); + return err; + } + cdesc->top_vdev = desc; + if (desc->ashift < cdesc->ashift) + desc->ashift = cdesc->ashift; + } + /* update child dev_state if needed */ + if (cdesc->txg < txg) + { + cdesc->txg = txg; + if (cdesc->config != NULL) + grub_free (cdesc->config); + cdesc->config = child; + } + if (cdesc->dev == NULL) + cdesc->dev_state = DEVICE_ERROR; + else if (grub_zfs_vdev_validate(cdesc->config)) + cdesc->dev_state = DEVICE_ERROR; + if (cdesc->config != child) + grub_free (child); + } + + grub_free (nvtop); + return err; +} + +/* + * read 'best' uberblock from disk. so we read UB array from each label + * from each vdev and looking for highest txg. + */ +static void +vdev_uberblock_load(struct grub_zfs_data *data, + struct grub_zfs_device_desc *desc) +{ + int vdevnum, l, ashift; + grub_uint64_t s, size; + grub_uint8_t *ub_array; + grub_err_t err; + + if (desc->dev == NULL) + return; + + if (desc->top_vdev) + ashift = desc->top_vdev->ashift; + else + ashift = desc->ashift; + + grub_dprintf("zfs", "vdev_uberblock_load: %s ashift: %d\n", + desc->dev_name, ashift); + vdevnum = VDEV_LABELS; + + ub_array = grub_zalloc (VDEV_UBERBLOCK_SIZE(ashift)); + if (!ub_array) + { + grub_dprintf("zfs", "ub_array allocation failed.\n"); + return; + } + + size = s = grub_disk_get_size (desc->dev->disk); + /* Don't check back labels on CDROM. */ + if (s == GRUB_DISK_SIZE_UNKNOWN) + vdevnum = VDEV_LABELS / 2; + else + { + /* grub_disk_get_size() returns sector count */ + s = s << desc->dev->disk->log_sector_size; + size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), grub_uint64_t); + } + + for (l = 0; l < vdevnum; l++) + { + grub_off_t offset; + int n; + + for (n = 0; n < VDEV_UBERBLOCK_COUNT(ashift); n++) + { + offset = vdev_label_offset(size, l, VDEV_UBERBLOCK_OFFSET(ashift, n)); + err = grub_disk_read (desc->dev->disk, 0, offset, + VDEV_UBERBLOCK_SIZE(ashift), (char *)ub_array); + if (err) + { + grub_dprintf ("zfs", "grub_disk_read() failed\n"); + grub_errno = GRUB_ERR_NONE; + continue; + } + err = uberblock_verify ((uberblock_t *)ub_array, offset, ashift); + if (err) + { + grub_errno = GRUB_ERR_NONE; + continue; + } + if (data->current_uberblock.ub_magic == 0 || + vdev_uberblock_compare(&data->current_uberblock, + (uberblock_t *)ub_array) > 0) + { + grub_memmove(&(data->current_uberblock), ub_array, + sizeof (uberblock_t)); + } + } + } + grub_free (ub_array); +} + +/* + * zfs_mount() locates a valid uberblock of the root pool and + * read in its MOS to the device grub_zfs_data structure. + * + * zfs_mount() is called frequently by GRUB2 zfs interfaces, so + * a cache mechanism is implemented to save the successful zfs + * mount data for devices. This is good for zfs mount performance. + * + */ +static struct grub_zfs_data * +zfs_mount (grub_device_t dev) +{ + struct grub_zfs_data *data = NULL; + struct zfs_mount_cache *zcache = NULL; + objset_phys_t *osp = 0; + grub_size_t ospsize; + grub_zfs_endian_t ub_endian = GRUB_ZFS_UNKNOWN_ENDIAN; + uberblock_t *ub; + unsigned int i, j; + char *diskname, *partname, *fullname, *pname; + char *features, *config = NULL; + grub_uint64_t u; + grub_err_t err; + + grub_errno = GRUB_ERR_NONE; + if (! dev->disk) + { + grub_error (GRUB_ERR_BAD_DEVICE, "not a disk"); + grub_errno = GRUB_ERR_BAD_DEVICE; + return NULL; + } + + diskname = (char *)dev->disk->name; + partname = grub_partition_get_name(dev->disk->partition); + if (partname[0] != 0) + { + fullname = grub_xasprintf("%s,%s", diskname, partname); + } + else + fullname = grub_strdup (diskname); + grub_free (partname); + + if ((err = vdev_disk_read_rootlabel(dev, &config)) != GRUB_ERR_NONE) + { + grub_error (err, "Cannot read the pool label from '%s'", fullname); + grub_free (fullname); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = err; + return NULL; + } + + pname = grub_zfs_nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); + if (!pname) + { + grub_error (err, "Incorrect pool label from '%s'", fullname); + grub_free (fullname); + grub_free (config); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + return NULL; + } + + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_VERSION, &u)) + { + grub_free (fullname); + grub_free (config); + grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_VERSION " not found"); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + + return NULL; + } + + if (!SPA_VERSION_IS_SUPPORTED(u)) + { + grub_free (fullname); + grub_free (config); + + grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET, + "ZFS SPA version %" PRIuGRUB_UINT64_T "is not compatible with this " + "GRUB2 ZFS reader code.\nSupported Oracle ZFS is %" PRIuGRUB_UINT64_T + "\nSupported OpenZFS version is %" PRIuGRUB_UINT64_T "\n", + (unsigned long long) u, + (unsigned long long) SPA_VERSION_ORACLE, + (unsigned long long) SPA_VERSION); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_NOT_IMPLEMENTED_YET; + + return NULL; + } + + features = grub_zfs_nvlist_lookup_nvlist (config, + ZPOOL_CONFIG_FEATURES_FOR_READ); + if (features) + { + const char *nvp; + char *name; + + for (nvp = nvlist_next_nvpair(features, NULL); nvp != NULL; + nvp = nvlist_next_nvpair(features, nvp)) { + name = nvpair_name(nvp); + if (name) { + err = check_feature(name, 1, NULL); + if (err) { + grub_free (name); + grub_free (features); + grub_free (fullname); + grub_free (config); + grub_errno = err; + return NULL; + } + grub_free (name); + } + } + grub_free (features); + } + grub_dprintf ("zfs", "zfs_mount: attempting to mount \"%s\" from %s\n", + pname, fullname); + + /* check the zfs mount cache list first */ + for (zcache = zfs_mount_cache_list; zcache != NULL; zcache = zcache->next) + { + grub_dprintf ("zfs", "zfs_mount: zcached %s\n", zcache->zcache_pool_name); + if (grub_strcmp (pname, zcache->zcache_pool_name) != 0) + continue; + + data = zcache->zcache_zfs_data; + data->file_buf = 0; + data->file_start = 0; + data->file_end = 0; + data->dnode_buf = 0; + data->dnode_mdn = 0; + data->dnode_start = 0; + data->dnode_end = 0; + data->dnode_endian = 0; + data->label = pname; + grub_memset(&data->dnode, 0, sizeof (data->dnode)); + grub_memset(&data->subvol, 0, sizeof (data->subvol)); + + grub_free (config); + + for (i = 0; i < data->n_devices_attached; i++) + { + mount_device (&data->devices_attached[i], dev, fullname); + } + grub_free (fullname); + /* need to check if dev is actually usable */ + return data; + } + + /* allocate the zfs mount data structure */ + data = grub_zalloc (sizeof (*data)); + if (! data) + { + grub_error (GRUB_ERR_OUT_OF_MEMORY, "zfs data allocation failed"); + grub_free (pname); + grub_free (config); + grub_free (fullname); + grub_errno = GRUB_ERR_OUT_OF_MEMORY; + return 0; + } + + /* need to do a new mount */ + data->label = pname; + + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_POOL_GUID, &u)) + { + grub_dprintf ("zfs", "can't read pool guid\n"); + grub_free (config); + zfs_unmount (data); + grub_free (fullname); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + return NULL; + } + data->guid = u; + grub_dprintf ("zfs", "pool guid: %" PRIxGRUB_UINT64_T "\n", u); + + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_POOL_STATE, &u)) + { + grub_dprintf ("zfs", "can't read pool state\n"); + grub_free (config); + zfs_unmount (data); + grub_free (fullname); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + return NULL; + } + data->state = u; + + if (!grub_zfs_nvlist_lookup_uint64 (config, ZPOOL_CONFIG_VDEV_CHILDREN, &u)) + { + grub_dprintf ("zfs", "can't read vdev children count\n"); + grub_free (config); + grub_free (fullname); + zfs_unmount (data); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + return NULL; + } + + /* set up memory for top level vdevs */ + data->n_devices_allocated = u; + data->devices_attached = grub_zalloc (sizeof (data->devices_attached[0]) + * data->n_devices_allocated); + if (! data->devices_attached) + { + grub_dprintf ("zfs", "can't allocate top level vdevs\n"); + grub_free (config); + grub_free (fullname); + zfs_unmount (data); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + return NULL; + } + + data->n_devices_attached = 0; + + /* + * check if the pool has multiple vdevs/children, add the disks. + * as the zfs_mount() caller provides us open "dev" and will close it, + * we dont wanna store this descriptor, but instead we open new instance. + */ + err = scan_devices(data, fullname, dev); + if (err) + { + /* we are missing disks, check if we have enough parity to continue */ + if (data->n_devices_attached == data->n_devices_allocated) + { + unsigned missing = 0; + struct grub_zfs_device_desc *desc; + /* + * only need to check raidz, as missing leaf can't get here + * and mirror means we have at least 1 disk in mirror + */ + err = GRUB_ERR_NONE; + for (i = 0; i < data->n_devices_attached; i++) + { + desc = &data->devices_attached[i]; + if (desc->type == DEVICE_RAIDZ) + { + for (j = 0; j < desc->n_children; j++) + if (desc->children[j].dev == NULL) + missing++; + if (missing > desc->nparity) + err = grub_error (GRUB_ERR_BAD_FS, + N_("pool %s is missing too many devices"), + data->label); + } + } + } + } -static void -zfs_unmount (struct grub_zfs_data *data) -{ - unsigned i; + /* if dev_state is DEVICE_ERROR, we need to use another device for mount */ for (i = 0; i < data->n_devices_attached; i++) - unmount_device (&data->devices_attached[i]); - grub_free (data->devices_attached); - grub_free (data->dnode_buf); - grub_free (data->dnode_mdn); - grub_free (data->file_buf); - for (i = 0; i < data->subvol.nkeys; i++) - grub_crypto_cipher_close (data->subvol.keyring[i].cipher); - grub_free (data->subvol.keyring); - grub_free (data); -} + { + struct grub_zfs_device_desc *desc; + desc = &data->devices_attached[i]; + if (err) + break; -/* - * zfs_mount() locates a valid uberblock of the root pool and read in its MOS - * to the memory address MOS. - * - */ -static struct grub_zfs_data * -zfs_mount (grub_device_t dev) -{ - struct grub_zfs_data *data = 0; - grub_err_t err; - void *osp = 0; - grub_size_t ospsize; - grub_zfs_endian_t ub_endian = GRUB_ZFS_UNKNOWN_ENDIAN; - uberblock_t *ub; - int inserted; + if (desc->type == DEVICE_LEAF) + { + if (desc->original && desc->dev_state == DEVICE_ERROR) + { + err = GRUB_ERR_BAD_DEVICE; + break; + } + } + for (j = 0; j < desc->n_children; j++) + { + if (desc->children[j].original && + desc->children[j].dev_state == DEVICE_ERROR) + { + err = GRUB_ERR_BAD_DEVICE; + break; + } + } + } - if (! dev->disk) + if (err) { - grub_error (GRUB_ERR_BAD_DEVICE, "not a disk"); - return 0; + grub_errno = err; + grub_dprintf("zfs", "zfs_mount failed: missing devices\n"); + grub_free (config); + zfs_unmount (data); + return NULL; } - data = grub_zalloc (sizeof (*data)); - if (!data) - return 0; -#if 0 - /* if it's our first time here, zero the best uberblock out */ - if (data->best_drive == 0 && data->best_part == 0 && find_best_root) - grub_memset (¤t_uberblock, 0, sizeof (uberblock_t)); -#endif + /* + * load the UB now. the obvious issue here is that we should load + * an config based on this UB, so instead we just hope the pool + * current config is ok. from bootloader point of view, it just may + * be "good enough", as we will output the disk with best ub with + * "search" command to be set for root, and so subsequent pool access + * will load the best config. same for "zfs-bootfs", to inform kernel + * to start from best disk - even if the kernel is able to deal better + * with pool config, there are cases where kernel will drop out and + * asks to use another disk to boot from... + */ + grub_dprintf("zfs", "pool: %s\n", data->label); + for (i = 0; i < data->n_devices_attached; i++) + if (data->devices_attached[i].type == DEVICE_LEAF) + { + grub_dprintf("zfs", " %s\n", + data->devices_attached[i].dev_name); + vdev_uberblock_load(data, &data->devices_attached[i]); + } + else + { + grub_dprintf("zfs", " %s-%u\n", + data->devices_attached[i].type == DEVICE_MIRROR? + "mirror" : "raidz", i); + for (j = 0; j < data->devices_attached[i].n_children; j++) + { + struct grub_zfs_device_desc *desc; + desc = &data->devices_attached[i].children[j]; + grub_dprintf("zfs", " %s\n", desc->dev_name == NULL? + "[missing]" : desc->dev_name); + vdev_uberblock_load(data, desc); + } + } - data->n_devices_allocated = 16; - data->devices_attached = grub_malloc (sizeof (data->devices_attached[0]) - * data->n_devices_allocated); - data->n_devices_attached = 0; - err = scan_disk (dev, data, 1, &inserted); - if (err) + grub_free (config); + + ub = &(data->current_uberblock); + + /* did we got UB from any of the disks? */ + if (ub->ub_magic == 0) { + grub_dprintf("zfs", "we have no uberblock, can not continue\n"); zfs_unmount (data); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; return NULL; } - ub = &(data->current_uberblock); ub_endian = (grub_zfs_to_cpu64 (ub->ub_magic, GRUB_ZFS_LITTLE_ENDIAN) == UBERBLOCK_MAGIC ? GRUB_ZFS_LITTLE_ENDIAN : GRUB_ZFS_BIG_ENDIAN); err = zio_read (&ub->ub_rootbp, ub_endian, - &osp, &ospsize, data); + (void **) &osp, &ospsize, data); if (err) { + grub_dprintf("zfs", "zio_read failed: %s\n", grub_errmsg ); zfs_unmount (data); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = err; return NULL; } @@ -3591,15 +5441,12 @@ grub_error (GRUB_ERR_BAD_FS, "OSP too small"); grub_free (osp); zfs_unmount (data); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; return NULL; } - if (ub->ub_version >= SPA_VERSION_FEATURES && - check_mos_features(&((objset_phys_t *) osp)->os_meta_dnode,ub_endian, - data) != 0) - return NULL; - - /* Got the MOS. Save it at the memory addr MOS. */ + /* Got the MOS. Save it to data->mos. */ grub_memmove (&(data->mos.dn), &((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE); data->mos.endian = (grub_zfs_to_cpu64 (ub->ub_rootbp.blk_prop, @@ -3608,6 +5455,42 @@ data->mounted = 1; + if(ub->ub_version >= SPA_VERSION_FEATURES && + check_mos_features(&data->mos, data) != 0) + { + zfs_unmount (data); + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_BAD_FS; + return NULL; + } + + data->mounted = 1; /* mount succeeded */ +#if 0 + /* + * zfs_mount cache is currently disabled, as it was causing + * artefacts with gfx menu - so there must be some bugs in it. + */ + if (zcache == NULL) /* allocate the zfs mount cache structure */ + zcache = grub_zalloc (sizeof (*zcache)); + else + zcache = NULL; /* intend for a non-cached mount */ +#endif + + if (zcache != NULL) /* cache this OK zfs mount */ + { + zcache->zcache_zfs_data = data; + zcache->zcache_pool_guid = data->guid; + zcache->zcache_pool_name = grub_strdup(data->label); + zcache->next = zfs_mount_cache_list; + if (zcache->zcache_pool_name) + { + zfs_mount_cache_list = zcache; + data->zcached = 1; + } + else + grub_free(zcache); + } + return data; } @@ -3615,40 +5498,72 @@ grub_zfs_fetch_nvlist (grub_device_t dev, char **nvlist) { struct grub_zfs_data *zfs; - grub_err_t err; + grub_err_t err = GRUB_ERR_NONE; + unsigned i, j; + char *diskname, *partname, *fullname; zfs = zfs_mount (dev); if (!zfs) return grub_errno; - err = zfs_fetch_nvlist (zfs->device_original, nvlist); + + diskname = (char *)dev->disk->name; + partname = grub_partition_get_name(dev->disk->partition); + if (partname[0] != 0) + { + fullname = grub_xasprintf("%s,%s", diskname, partname); + } + else + fullname = grub_strdup (diskname); + grub_free (partname); + + for (i = 0; i < zfs->n_devices_attached; i++) + { + struct grub_zfs_device_desc *desc; + desc = &zfs->devices_attached[i]; + if (desc->type == DEVICE_LEAF) + { + if (grub_strcmp(fullname, desc->dev_name) == 0) + err = vdev_disk_read_rootlabel (desc->dev, nvlist); + } + else + { + for (j = 0; j < desc->n_children; j++) + if (grub_strcmp(fullname, desc->children[j].dev_name) == 0) + err = vdev_disk_read_rootlabel (desc->children[j].dev, nvlist); + } + } + + if (err) + grub_dprintf("zfs", "zfs_fetch_nvlist failed: device:%s\n", fullname); + grub_free (fullname); zfs_unmount (zfs); return err; } +/* + * Returns the pool name. + */ static grub_err_t zfs_label (grub_device_t device, char **label) { - char *nvlist; - grub_err_t err; struct grub_zfs_data *data; + *label = NULL; + data = zfs_mount (device); if (! data) return grub_errno; - err = zfs_fetch_nvlist (data->device_original, &nvlist); - if (err) - { - zfs_unmount (data); - return err; - } - - *label = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_POOL_NAME); - grub_free (nvlist); + *label = grub_strdup(data->label); + if (*label == NULL) + grub_errno = GRUB_ERR_OUT_OF_MEMORY; zfs_unmount (data); return grub_errno; } +/* + * Returns the pool GUID. + */ static grub_err_t zfs_uuid (grub_device_t device, char **uuid) { @@ -3663,7 +5578,11 @@ *uuid = grub_xasprintf ("%016llx", (long long unsigned) data->guid); zfs_unmount (data); if (! *uuid) - return grub_errno; + { + if (grub_errno == GRUB_ERR_NONE) + grub_errno = GRUB_ERR_OUT_OF_MEMORY; + return grub_errno; + } return GRUB_ERR_NONE; } @@ -3692,7 +5611,8 @@ /* * zfs_open() locates a file in the rootpool by following the - * MOS and places the dnode of the file in the memory address DNODE. + * MOS and places the dnode of the file in the device + * grub_zfs_data structure. */ static grub_err_t grub_zfs_open (struct grub_file *file, const char *fsfilename) @@ -3815,9 +5735,9 @@ grub_uint64_t blkid = grub_divmod64 (file->offset + read, blksz, 0); grub_free (data->file_buf); data->file_buf = 0; + data->file_start = data->file_end = 0; - err = dmu_read (&(data->dnode), blkid, &t, - 0, data); + err = dmu_read (&(data->dnode), blkid, &t, 0, data); data->file_buf = t; if (err) { @@ -3874,6 +5794,42 @@ return err; } +grub_err_t +grub_zfs_defaultbootfsobj (grub_device_t dev, grub_uint64_t *mdnobj) +{ + struct grub_zfs_data *data; + grub_err_t err; + + data = zfs_mount (dev); + if (! data) + return grub_errno; + + err = get_default_bootfsobj (&(data->mos), mdnobj, data); + zfs_unmount (data); + + return err; +} + +grub_err_t +grub_zfs_defaultbootfsname (grub_device_t dev, char **bootfsname) +{ + grub_uint64_t mdnobj; + struct grub_zfs_data *data; + grub_err_t err; + + data = zfs_mount (dev); + if (! data) + return grub_errno; + + err = get_default_bootfsobj (&(data->mos), &mdnobj, data); + if (! err) + err = get_default_bootfsname(&(data->mos), mdnobj, data, &(*bootfsname)); + + zfs_unmount (data); + + return err; +} + static grub_err_t fill_fs_info (struct grub_dirhook_info *info, dnode_end_t mdn, struct grub_zfs_data *data) @@ -3984,247 +5940,185 @@ { blkptr_t *bp = &dn.dn.dn_spill; - err = zio_read (bp, dn.endian, &sahdrp, NULL, ctx->data); + err = zio_read(bp, dn.endian, &sahdrp, NULL, ctx->data); if (err) { - grub_print_error (); + grub_print_error(); return 0; } } else - { - grub_error (GRUB_ERR_BAD_FS, "filesystem is corrupt"); - grub_print_error (); + { + grub_error(GRUB_ERR_BAD_FS, "filesystem is corrupt"); + grub_print_error(); return 0; } hdrsize = SA_HDR_SIZE (((sa_hdr_phys_t *) sahdrp)); info.mtimeset = 1; - info.mtime = grub_zfs_to_cpu64 (grub_get_unaligned64 ((char *) sahdrp + hdrsize + SA_MTIME_OFFSET), dn.endian); + info.mtime = grub_zfs_to_cpu64(grub_get_unaligned64((char *) sahdrp + hdrsize + SA_MTIME_OFFSET), dn.endian); info.case_insensitive = ctx->data->subvol.case_insensitive; } - + if (dn.dn.dn_bonustype == DMU_OT_ZNODE) { info.mtimeset = 1; - info.mtime = grub_zfs_to_cpu64 (((znode_phys_t *) DN_BONUS (&dn.dn))->zp_mtime[0], - dn.endian); + info.mtime = grub_zfs_to_cpu64(((znode_phys_t *) DN_BONUS (&dn.dn))->zp_mtime[0], + dn.endian); } info.dir = (dn.dn.dn_type == DMU_OT_DIRECTORY_CONTENTS); grub_dprintf ("zfs", "type=%d, name=%s\n", (int)dn.dn.dn_type, (char *)name); - return ctx->hook (name, &info, ctx->hook_data); + return ctx->hook(name, &info, ctx->hook_data); } /* Helper for grub_zfs_dir. */ static int -iterate_zap_fs (const char *name, grub_uint64_t val, - struct grub_zfs_dir_ctx *ctx) +iterate_zap_fs(const char *name, grub_uint64_t val, + struct grub_zfs_dir_ctx *ctx) { - grub_err_t err; - struct grub_dirhook_info info; - - dnode_end_t mdn; - err = dnode_get (&(ctx->data->mos), val, 0, &mdn, ctx->data); - if (err) - { - grub_errno = 0; - return 0; - } - if (mdn.dn.dn_type != DMU_OT_DSL_DIR) - return 0; + grub_err_t err; + struct grub_dirhook_info info; + dnode_end_t mdn; + err = dnode_get (&(ctx->data->mos), val, 0, &mdn, ctx->data); + if (err) { + grub_errno = 0; + return 0; + } + if (mdn.dn.dn_type != DMU_OT_DSL_DIR) + return 0; + + err = fill_fs_info (&info, mdn, ctx->data); + if (err) { + grub_errno = 0; + return 0; + } + return ctx->hook (name, &info, ctx->hook_data); - err = fill_fs_info (&info, mdn, ctx->data); - if (err) - { - grub_errno = 0; - return 0; - } - return ctx->hook (name, &info, ctx->hook_data); } /* Helper for grub_zfs_dir. */ static int -iterate_zap_snap (const char *name, grub_uint64_t val, - struct grub_zfs_dir_ctx *ctx) +iterate_zap_snap(const char *name, grub_uint64_t val, + struct grub_zfs_dir_ctx *ctx) { - grub_err_t err; - struct grub_dirhook_info info; - char *name2; - int ret; - - dnode_end_t mdn; - - err = dnode_get (&(ctx->data->mos), val, 0, &mdn, ctx->data); - if (err) - { - grub_errno = 0; - return 0; - } + grub_err_t err; + struct grub_dirhook_info info; + char *name2; + int ret; + dnode_end_t mdn; + + err = dnode_get(&(ctx->data->mos), val, 0, &mdn, ctx->data); + if (err) { + grub_errno = 0; + return 0; + } + + if (mdn.dn.dn_type != DMU_OT_DSL_DATASET) + return 0; + + err = fill_fs_info(&info, mdn, ctx->data); + if (err) { + grub_errno = 0; + return 0; + } + + name2 = grub_malloc(grub_strlen (name) + 2); + name2[0] = '@'; + grub_memcpy(name2 + 1, name, grub_strlen (name) + 1); + ret = ctx->hook(name2, &info, ctx->hook_data); + grub_free(name2); + return ret; - if (mdn.dn.dn_type != DMU_OT_DSL_DATASET) - return 0; - - err = fill_fs_info (&info, mdn, ctx->data); - if (err) - { - grub_errno = 0; - return 0; - } - - name2 = grub_malloc (grub_strlen (name) + 2); - name2[0] = '@'; - grub_memcpy (name2 + 1, name, grub_strlen (name) + 1); - ret = ctx->hook (name2, &info, ctx->hook_data); - grub_free (name2); - return ret; } static grub_err_t grub_zfs_dir (grub_device_t device, const char *path, grub_fs_dir_hook_t hook, void *hook_data) { - struct grub_zfs_dir_ctx ctx = { - .hook = hook, - .hook_data = hook_data - }; - struct grub_zfs_data *data; - grub_err_t err; - int isfs; - - data = zfs_mount (device); - if (! data) - return grub_errno; - err = dnode_get_fullpath (path, &(data->subvol), &(data->dnode), &isfs, data); - if (err) - { - zfs_unmount (data); - return err; - } - ctx.data = data; - - if (isfs) - { - grub_uint64_t childobj, headobj; - grub_uint64_t snapobj; - dnode_end_t dn; - struct grub_dirhook_info info; - - err = fill_fs_info (&info, data->dnode, data); - if (err) - { - zfs_unmount (data); - return err; - } - if (hook ("@", &info, hook_data)) - { - zfs_unmount (data); - return GRUB_ERR_NONE; - } + struct grub_zfs_dir_ctx ctx = { + .hook = hook, + .hook_data = hook_data + }; + struct grub_zfs_data *data; + grub_err_t err; + int isfs; - childobj = grub_zfs_to_cpu64 (((dsl_dir_phys_t *) DN_BONUS (&data->dnode.dn))->dd_child_dir_zapobj, data->dnode.endian); - headobj = grub_zfs_to_cpu64 (((dsl_dir_phys_t *) DN_BONUS (&data->dnode.dn))->dd_head_dataset_obj, data->dnode.endian); - err = dnode_get (&(data->mos), childobj, - DMU_OT_DSL_DIR_CHILD_MAP, &dn, data); - if (err) - { - zfs_unmount (data); - return err; + data = zfs_mount(device); + if (!data) + return grub_errno; + err = dnode_get_fullpath(path, &(data->subvol), &(data->dnode), + &isfs, data); + if (err) { + grub_dprintf("zfs", "dnode_get_fullpath failed\n"); + zfs_unmount(data); + return err; } + ctx.data = data; - zap_iterate_u64 (&dn, iterate_zap_fs, data, &ctx); + if (isfs) { + grub_uint64_t childobj, headobj; + grub_uint64_t snapobj; + dnode_end_t dn; + struct grub_dirhook_info info; + + err = fill_fs_info(&info, data->dnode, data); + if (err) { + zfs_unmount(data); + return err; + } + if (hook ("@", &info, hook_data)) { + zfs_unmount(data); + return GRUB_ERR_NONE; + } - err = dnode_get (&(data->mos), headobj, DMU_OT_DSL_DATASET, &dn, data); - if (err) - { - zfs_unmount (data); - return err; - } - - snapobj = grub_zfs_to_cpu64 (((dsl_dataset_phys_t *) DN_BONUS (&dn.dn))->ds_snapnames_zapobj, dn.endian); - - err = dnode_get (&(data->mos), snapobj, - DMU_OT_DSL_DS_SNAP_MAP, &dn, data); - if (err) - { - zfs_unmount (data); - return err; - } - - zap_iterate_u64 (&dn, iterate_zap_snap, data, &ctx); - } - else - { - if (data->dnode.dn.dn_type != DMU_OT_DIRECTORY_CONTENTS) - { - zfs_unmount (data); - return grub_error (GRUB_ERR_BAD_FILE_TYPE, N_("not a directory")); - } - zap_iterate_u64 (&(data->dnode), iterate_zap, data, &ctx); - } - zfs_unmount (data); - return grub_errno; -} + childobj = grub_zfs_to_cpu64( + ((dsl_dir_phys_t *) + DN_BONUS (&data->dnode.dn))->dd_child_dir_zapobj, + data->dnode.endian); + + headobj = grub_zfs_to_cpu64( + ((dsl_dir_phys_t *) + DN_BONUS (&data->dnode.dn))->dd_head_dataset_obj, + data->dnode.endian); + err = dnode_get (&(data->mos), childobj, + DMU_OT_DSL_DIR_CHILD_MAP, &dn, data); + if (err) { + zfs_unmount(data); + return err; + } -static int -check_feature (const char *name, grub_uint64_t val, - struct grub_zfs_dir_ctx *ctx __attribute__((unused))) -{ - int i; - if (val == 0) - return 0; - if (name[0] == 0) - return 0; - for (i = 0; spa_feature_names[i] != NULL; i++) - if (grub_strcmp (name, spa_feature_names[i]) == 0) - return 0; - return 1; -} + zap_iterate_u64(&dn, iterate_zap_fs, data, &ctx); -/* - * Checks whether the MOS features that are active are supported by this - * (GRUB's) implementation of ZFS. - * - * Return: - * 0: Success. - * errnum: Failure. - */ - -static int -check_mos_features(dnode_phys_t *mosmdn_phys,grub_zfs_endian_t endian,struct grub_zfs_data* data ) -{ - grub_uint64_t objnum; - grub_uint8_t errnum = 0; - dnode_end_t dn,mosmdn; - mzap_phys_t* mzp; - grub_zfs_endian_t endianzap; - int size; - grub_memmove(&(mosmdn.dn),mosmdn_phys,sizeof(dnode_phys_t)); - mosmdn.endian=endian; - errnum = dnode_get(&mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, &dn,data); - if (errnum != 0) - return errnum; + err = dnode_get(&(data->mos), headobj, + DMU_OT_DSL_DATASET, &dn, data); + if (err) { + zfs_unmount(data); + return err; + } - /* - * Find the object number for 'features_for_read' and retrieve its - * corresponding dnode. Note that we don't check features_for_write - * because GRUB is not opening the pool for write. - */ - errnum = zap_lookup(&dn, DMU_POOL_FEATURES_FOR_READ, &objnum, data,0); - if (errnum != 0) - return errnum; - - errnum = dnode_get(&mosmdn, objnum, DMU_OTN_ZAP_METADATA, &dn, data); - if (errnum != 0) - return errnum; - - errnum = dmu_read(&dn, 0, (void**)&mzp, &endianzap,data); - if (errnum != 0) - return errnum; + snapobj = grub_zfs_to_cpu64( + ((dsl_dataset_phys_t *) + DN_BONUS (&dn.dn))->ds_snapnames_zapobj, dn.endian); + + err = dnode_get(&(data->mos), snapobj, + DMU_OT_DSL_DS_SNAP_MAP, &dn, data); + if (err) { + zfs_unmount(data); + return err; + } - size = grub_zfs_to_cpu16 (dn.dn.dn_datablkszsec, dn.endian) << SPA_MINBLOCKSHIFT; - return mzap_iterate (mzp,endianzap, size, check_feature,NULL); + zap_iterate_u64(&dn, iterate_zap_snap, data, &ctx); + } else { + if (data->dnode.dn.dn_type != DMU_OT_DIRECTORY_CONTENTS) { + zfs_unmount (data); + return grub_error(GRUB_ERR_BAD_FILE_TYPE, + N_("not a directory")); + } + zap_iterate_u64(&(data->dnode), iterate_zap, data, &ctx); + } + zfs_unmount(data); + return grub_errno; } @@ -4260,6 +6154,7 @@ } #endif +/* GRUB2 zfs FS module interface structure */ static struct grub_fs grub_zfs_fs = { .name = "zfs", .dir = grub_zfs_dir, @@ -4272,7 +6167,7 @@ #ifdef GRUB_UTIL .embed = grub_zfs_embed, .reserved_first_sector = 1, - .blocklist_install = 0, + .blocklist_install = 1, #endif .next = 0 }; @@ -4288,5 +6183,6 @@ GRUB_MOD_FINI (zfs) { + zfs_free_caches (); grub_fs_unregister (&grub_zfs_fs); } diff -Naur grub-2.02~beta2_/grub-core/fs/zfs/zfs_lz4.c grub-2.02~beta2/grub-core/fs/zfs/zfs_lz4.c --- grub-2.02~beta2_/grub-core/fs/zfs/zfs_lz4.c 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/grub-core/fs/zfs/zfs_lz4.c 2015-10-03 17:18:23.000000000 -0700 @@ -33,12 +33,38 @@ */ #include +#include #include #include +#include +#include #include -static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, - int isize, int maxOutputSize); +static grub_err_t LZ4_uncompress_unknownOutputSize(const grub_uint8_t *source, + char *dest, grub_uint32_t isize, grub_size_t maxOutputSize); +grub_err_t +lz4_decompress (void *s_start, void *d_start, grub_size_t s_len, + grub_size_t d_len); + +grub_err_t +lz4_decompress (void *s_start, void *d_start, grub_size_t s_len, + grub_size_t d_len) +{ + const grub_uint8_t *src = s_start; + grub_uint32_t bufsiz = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | + src[3]; + + /* invalid compressed buffer size encoded at start */ + if (bufsiz + 4 > s_len) + return grub_error (GRUB_ERR_BAD_FS, "lz4 decompression failed"); + + /* + * Returns 0 on success (decompression function returned non-negative) + * and non-zero on failure (decompression function returned negative). + */ + return LZ4_uncompress_unknownOutputSize(src + 4, d_start, bufsiz, + d_len); +} /* * CPU Feature Detection @@ -52,8 +78,33 @@ #endif /* + * Little Endian or Big Endian? + * Note: overwrite the below #define if you know your architecture endianess. + */ +#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ + defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ + defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ + defined(__powerpc) || defined(powerpc) || \ + ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) +#define LZ4_BIG_ENDIAN 1 +#else + /* + * Little Endian assumed. PDP Endian and other very rare endian format + * are unsupported. + */ +#endif + +/* * Compiler Options */ +#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L /* C99 */ +/* "restrict" is a known keyword */ +#else +/* Disable restrict */ +#if !defined restrict +#define restrict +#endif +#endif #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) @@ -73,7 +124,6 @@ #define U32 grub_uint32_t #define S32 grub_int32_t #define U64 grub_uint64_t -typedef grub_size_t size_t; typedef struct _U16_S { U16 v; @@ -125,49 +175,30 @@ #define INITBASE(base) const int base = 0 #endif -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - grub_le_to_cpu16 (A16 (p)); } -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = grub_cpu_to_le16 (v); p += 2; } +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + { d = (s) - grub_le_to_cpu16(A16(p)); } +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + { A16(p) = grub_cpu_to_le16(v); p += 2; } /* Macros */ #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); /* Decompression functions */ -grub_err_t -lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len); - -grub_err_t -lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) -{ - const BYTE *src = s_start; - U32 bufsiz = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | - src[3]; - /* invalid compressed buffer size encoded at start */ - if (bufsiz + 4 > s_len) - return grub_error(GRUB_ERR_BAD_FS,"lz4 decompression failed."); - - /* - * Returns 0 on success (decompression function returned non-negative) - * and appropriate error on failure (decompression function returned negative). - */ - return (LZ4_uncompress_unknownOutputSize((char*)s_start + 4, d_start, bufsiz, - d_len) < 0)?grub_error(GRUB_ERR_BAD_FS,"lz4 decompression failed."):0; -} - -static int -LZ4_uncompress_unknownOutputSize(const char *source, - char *dest, int isize, int maxOutputSize) +static grub_err_t +LZ4_uncompress_unknownOutputSize(const grub_uint8_t *source, + char *dest, grub_uint32_t isize, grub_size_t maxOutputSize) { /* Local Variables */ - const BYTE * ip = (const BYTE *) source; + const BYTE *restrict ip = (const BYTE *) source; const BYTE *const iend = ip + isize; - const BYTE * ref; + const BYTE *restrict ref; - BYTE * op = (BYTE *) dest; + BYTE *restrict op = (BYTE *) dest; BYTE *const oend = op + maxOutputSize; BYTE *cpy; - size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 }; + grub_size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 }; /* Main Loop */ while (ip < iend) { @@ -185,6 +216,10 @@ } /* copy literals */ cpy = op + length; + /* CORNER-CASE: cpy might overflow. */ + if (cpy < op) /* cpy was overflowed, bail! */ + return grub_error (GRUB_ERR_BAD_FS, + "lz4 decompression failed"); if ((cpy > oend - COPYLENGTH) || (ip + length > iend - COPYLENGTH)) { if (cpy > oend) @@ -192,19 +227,19 @@ * Error: request to write beyond destination * buffer. */ - goto _output_error; + return grub_error (GRUB_ERR_BAD_FS, "lz4 decompression failed"); if (ip + length > iend) /* * Error : request to read beyond source * buffer. */ - goto _output_error; + return grub_error (GRUB_ERR_BAD_FS, "lz4 decompression failed"); grub_memcpy(op, ip, length); op += length; ip += length; if (ip < iend) /* Error : LZ4 format violation */ - goto _output_error; + return grub_error (GRUB_ERR_BAD_FS, "lz4 decompression failed"); /* Necessarily EOF, due to parsing restrictions. */ break; } @@ -220,7 +255,7 @@ * Error: offset creates reference outside of * destination buffer. */ - goto _output_error; + return grub_error (GRUB_ERR_BAD_FS, "lz4 decompression failed"); /* get matchlength */ if ((length = (token & ML_MASK)) == ML_MASK) { @@ -235,8 +270,8 @@ /* copy repeated sequence */ if unlikely(op - ref < STEPSIZE) { #if LZ4_ARCH64 - size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; - size_t dec2 = dec2table[op - ref]; + grub_size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + grub_size_t dec2 = dec2table[op - ref]; #else const int dec2 = 0; #endif @@ -258,7 +293,7 @@ * Error: request to write outside of * destination buffer. */ - goto _output_error; + return grub_error (GRUB_ERR_BAD_FS, "lz4 decompression failed"); LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); while (op < cpy) *op++ = *ref++; @@ -276,9 +311,5 @@ } /* end of decoding */ - return (int)(((char *)op) - dest); - - /* write overflow error detected */ - _output_error: - return (int)(-(((char *)ip) - source)); + return GRUB_ERR_NONE; } diff -Naur grub-2.02~beta2_/grub-core/fs/zfs/zfscrypt.c grub-2.02~beta2/grub-core/fs/zfs/zfscrypt.c --- grub-2.02~beta2_/grub-core/fs/zfs/zfscrypt.c 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/grub-core/fs/zfs/zfscrypt.c 2015-10-03 17:18:23.000000000 -0700 @@ -265,7 +265,8 @@ mac_out, nonce, 15 - l, m); default: - return GPG_ERR_CIPHER_ALGO; + return grub_error(GPG_ERR_CIPHER_ALGO, "algorithm %" + PRIuGRUB_UINT64_T " is not supported yet", algo); } } diff -Naur grub-2.02~beta2_/grub-core/fs/zfs/zfsinfo.c grub-2.02~beta2/grub-core/fs/zfs/zfsinfo.c --- grub-2.02~beta2_/grub-core/fs/zfs/zfsinfo.c 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/grub-core/fs/zfs/zfsinfo.c 2015-10-03 17:18:23.000000000 -0700 @@ -193,10 +193,16 @@ return GRUB_ERR_NONE; } -static grub_err_t -get_bootpath (char *nvlist, char **bootpath, char **devid) +#ifndef ZVBOOT +static +#endif /* ZVBOOT */ +grub_err_t +get_bootpath (char *nvlist, grub_uint64_t devguid, char **bootpath, + char **devid) { char *type = 0; + grub_uint64_t curguid; + int found; type = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_TYPE); @@ -205,20 +211,29 @@ if (grub_strcmp (type, VDEV_TYPE_DISK) == 0) { + found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID, + &curguid); + + *bootpath = 0; + *devid = 0; + + if (found && curguid != devguid) + { + return GRUB_ERR_NONE; + } + + if (grub_zfs_vdev_validate(nvlist)) /* device error */ + return GRUB_ERR_NONE; + *bootpath = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_PHYS_PATH); *devid = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_DEVID); - if (!*bootpath || !*devid) - { - grub_free (*bootpath); - grub_free (*devid); - *bootpath = 0; - *devid = 0; - } + return GRUB_ERR_NONE; } - if (grub_strcmp (type, VDEV_TYPE_MIRROR) == 0) + if (grub_strcmp (type, VDEV_TYPE_MIRROR) == 0 || + grub_strcmp (type, VDEV_TYPE_RAIDZ) == 0) { int nelm, i; @@ -233,11 +248,18 @@ ZPOOL_CONFIG_CHILDREN, i); - get_bootpath (child, bootpath, devid); + if (! child) + { + grub_printf_ (N_("Unable to lookup child vdevs for pool: %s\n"), + grub_errmsg); + return grub_errno; + } + + get_bootpath (child, devguid, bootpath, devid); grub_free (child); - if (*bootpath && *devid) + if (*bootpath || *devid) return GRUB_ERR_NONE; } } @@ -274,7 +296,7 @@ int found; if (argc < 1) - return grub_error (GRUB_ERR_BAD_ARGUMENT, N_("one argument expected")); + return grub_error (GRUB_ERR_BAD_ARGUMENT, N_("device name required")); if (args[0][0] == '(' && args[0][grub_strlen (args[0]) - 1] == ')') { @@ -348,12 +370,13 @@ char *nv = 0; char *bootpath = 0, *devid = 0; char *fsname; - char *bootfs; - char *poolname; - grub_uint64_t mdnobj; + char *bootfs = NULL; + char *poolname = NULL; + grub_uint64_t mdnobj, devguid; + int found; if (argc < 1) - return grub_error (GRUB_ERR_BAD_ARGUMENT, N_("one argument expected")); + return grub_error (GRUB_ERR_BAD_ARGUMENT, N_("filesystem name required")); devname = grub_file_get_device_name (args[0]); if (grub_errno) @@ -378,25 +401,35 @@ grub_device_close (dev); if (err) - return err; + { + grub_free (nvlist); + return err; + } + + found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID, &devguid); + + if (!found) + { + if (!grub_errno) + grub_error(GRUB_ERR_BAD_FS, N_("No virtual device GUID found")); + grub_free (nvlist); + return grub_errno; + } poolname = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_POOL_NAME); if (!poolname) { if (!grub_errno) grub_error (GRUB_ERR_BAD_FS, "No poolname found"); + grub_free (nvlist); return grub_errno; } nv = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE); - if (nv) - get_bootpath (nv, &bootpath, &devid); - - grub_free (nv); - grub_free (nvlist); - - bootfs = grub_xasprintf ("zfs-bootfs=%s/%llu%s%s%s%s%s%s", + if (nv && (get_bootpath (nv, devguid, &bootpath, &devid) == GRUB_ERR_NONE)) + { + bootfs = grub_xasprintf ("zfs-bootfs=%s/%llu%s%s%s%s%s%s", poolname, (unsigned long long) mdnobj, bootpath ? ",bootpath=\"" : "", bootpath ? : "", @@ -404,6 +437,11 @@ devid ? ",diskdevid=\"" : "", devid ? : "", devid ? "\"" : ""); + } + grub_free (nv); + grub_free (nvlist); + grub_free (poolname); + if (!bootfs) return grub_errno; if (argc >= 2) @@ -412,28 +450,159 @@ grub_printf ("%s\n", bootfs); grub_free (bootfs); - grub_free (poolname); grub_free (bootpath); grub_free (devid); return GRUB_ERR_NONE; } +static grub_err_t +grub_cmd_zfs_defaultbootfs (grub_command_t cmd __attribute__ ((unused)), + int argc, char **args) +{ + grub_device_t dev; + char *devname = 0; + grub_err_t err; + char *nvlist = 0; + char *nv = 0; + char *bootpath = 0, *devid = 0; + char *bootfs = 0; + char *poolname; + char *bootfsname = 0; + grub_uint64_t mdnobj, devguid; + int found; + + if (argc < 1) + return grub_error (GRUB_ERR_BAD_ARGUMENT, N_("variable name required")); + + if (argc >= 2) + { + /* args[0] is the DEVICE name */ + if (args[0][0] == '(' && args[0][grub_strlen (args[0]) - 1] == ')') + { + devname = grub_strdup (args[0] + 1); + if (devname) + devname[grub_strlen (devname) - 1] = 0; + } + else + devname = grub_strdup (args[0]); + } + else /* args[0] is the variable, we'll use root device */ + devname = grub_strdup(grub_env_get("root")); + + if (grub_errno) + { + grub_free(devname); + return grub_errno; + } + + dev = grub_device_open (devname); + grub_free (devname); + if (!dev) + return grub_errno; + + err = grub_zfs_fetch_nvlist (dev, &nvlist); + if (!err) + err = grub_zfs_defaultbootfsobj (dev, &mdnobj); + + if (err) + { + grub_device_close (dev); + grub_free (nvlist); + return err; + } + + err = grub_zfs_defaultbootfsname (dev, &bootfsname); + + grub_device_close (dev); + + if (err) + { + grub_free (nvlist); + return err; + } + + if (argc >= 3) + { + found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID, + &devguid); + if (!found) + { + if (!grub_errno) + grub_error(GRUB_ERR_BAD_FS, N_("No virtual device GUID found")); + grub_free (nvlist); + grub_free (bootfsname); + return grub_errno; + } + + poolname = grub_zfs_nvlist_lookup_string (nvlist, ZPOOL_CONFIG_POOL_NAME); + if (!poolname) + { + if (!grub_errno) + grub_error (GRUB_ERR_BAD_FS, N_("No poolname found")); + grub_free (nvlist); + grub_free (bootfsname); + return grub_errno; + } + + nv = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE); + + if (nv && (get_bootpath (nv, devguid, &bootpath, &devid) == GRUB_ERR_NONE)) + { + bootfs = grub_xasprintf ("zfs-bootfs=%s/%llu%s%s%s%s%s%s", + poolname, (unsigned long long) mdnobj, + bootpath ? ",bootpath=\"" : "", + bootpath ? : "", + bootpath ? "\"" : "", + devid ? ",diskdevid=\"" : "", + devid ? : "", + devid ? "\"" : ""); + grub_free (nv); + grub_free (poolname); + grub_free (bootpath); + grub_free (devid); + if (!bootfs) + { + grub_free (nvlist); + return grub_errno; + } + grub_dprintf ("zfs", "bootfs=%s\n", bootfs); + grub_env_set (args[2], bootfs); + } + } + + grub_dprintf ("zfs", "bootfsname=%s\n", bootfsname); + + if (argc >= 2) + grub_env_set (args[1], bootfsname); + else + grub_env_set (args[0], bootfsname); + + grub_free (nvlist); + grub_free (bootfs); + grub_free (bootfsname); + + return GRUB_ERR_NONE; +} -static grub_command_t cmd_info, cmd_bootfs; +static grub_command_t cmd_info, cmd_bootfs, cmd_defaultbootfs; -GRUB_MOD_INIT (zfsinfo) +GRUB_MOD_INIT(zfsinfo) { - cmd_info = grub_register_command ("zfsinfo", grub_cmd_zfsinfo, - N_("DEVICE"), - N_("Print ZFS info about DEVICE.")); + cmd_info = grub_register_command("zfsinfo", grub_cmd_zfsinfo, + N_("DEVICE"), N_("Print ZFS info about DEVICE.")); cmd_bootfs = grub_register_command ("zfs-bootfs", grub_cmd_zfs_bootfs, - N_("FILESYSTEM [VARIABLE]"), - N_("Print ZFS-BOOTFSOBJ or store it into VARIABLE")); + N_("FILESYSTEM [VARIABLE]"), + N_("Print ZFS-BOOTFSOBJ or store it into VARIABLE")); + cmd_defaultbootfs = grub_register_command ("zfs-defaultbootfs", + grub_cmd_zfs_defaultbootfs, + N_("VARIABLE or zfs-defaultbootfs DEVICE VARIABLE-A or zfs-defaultbootfs DEVICE VARIABLE-A VARIABLE-B"), + N_("Set default bootfs name of root device or given DEVICE to VARIABLE-A or set default bootfsname to VARIABLE-A and ZFS-BOOTFSOBJ to VARIABLE-B")); } GRUB_MOD_FINI (zfsinfo) { grub_unregister_command (cmd_info); grub_unregister_command (cmd_bootfs); + grub_unregister_command (cmd_defaultbootfs); } diff -Naur grub-2.02~beta2_/grub-core/partmap/gpt.c grub-2.02~beta2/grub-core/partmap/gpt.c --- grub-2.02~beta2_/grub-core/partmap/gpt.c 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/grub-core/partmap/gpt.c 2015-10-03 17:18:23.000000000 -0700 @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include GRUB_MOD_LICENSE ("GPLv3+"); @@ -37,6 +39,7 @@ #ifdef GRUB_UTIL static const grub_gpt_part_type_t grub_gpt_partition_type_bios_boot = GRUB_GPT_PARTITION_TYPE_BIOS_BOOT; +static const grub_gpt_part_type_t grub_gpt_partition_type_zfs = GRUB_GPT_PARTITION_TYPE_ZFS; #endif /* 512 << 7 = 65536 byte sectors. */ @@ -162,6 +165,12 @@ return 1; } + if (! grub_memcmp (&gptdata.type, &grub_gpt_partition_type_zfs, 16)) + { + ctx->start = p->start + (VDEV_BOOT_OFFSET >> GRUB_DISK_SECTOR_BITS); + ctx->len = (VDEV_BOOT_SIZE >> GRUB_DISK_SECTOR_BITS); + return 1; + } return 0; } diff -Naur grub-2.02~beta2_/include/grub/gpt_partition.h grub-2.02~beta2/include/grub/gpt_partition.h --- grub-2.02~beta2_/include/grub/gpt_partition.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/gpt_partition.h 2015-10-03 17:18:23.000000000 -0700 @@ -50,6 +50,13 @@ { 0x85, 0xD2, 0xE1, 0xE9, 0x04, 0x34, 0xCF, 0xB3 } \ } +#define GRUB_GPT_PARTITION_TYPE_ZFS \ + { grub_cpu_to_le32_compile_time (0x6A898CC3U),\ + grub_cpu_to_le16_compile_time (0x1DD2), \ + grub_cpu_to_le16_compile_time (0x11B2), \ + { 0x99, 0xA6, 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } \ + } + struct grub_gpt_header { grub_uint8_t magic[8]; diff -Naur grub-2.02~beta2_/include/grub/zfs/dmu.h grub-2.02~beta2/include/grub/zfs/dmu.h --- grub-2.02~beta2_/include/grub/zfs/dmu.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/dmu.h 2015-10-03 17:18:23.000000000 -0700 @@ -22,39 +22,6 @@ #ifndef _SYS_DMU_H #define _SYS_DMU_H -#define B_FALSE 0 -#define B_TRUE 1 - -#define DMU_OT_NEWTYPE 0x80 -#define DMU_OT_METADATA 0x40 -#define DMU_OT_BYTESWAP_MASK 0x3f - -#define DMU_OT(byteswap, metadata) \ - (DMU_OT_NEWTYPE | \ - ((metadata) ? DMU_OT_METADATA : 0) | \ - ((byteswap) & DMU_OT_BYTESWAP_MASK)) - -#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ - ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ - (ot) < DMU_OT_NUMTYPES) - -#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ - ((ot) & DMU_OT_METADATA) : \ - dmu_ot[(ot)].ot_metadata) - -typedef enum dmu_object_byteswap { - DMU_BSWAP_UINT8, - DMU_BSWAP_UINT16, - DMU_BSWAP_UINT32, - DMU_BSWAP_UINT64, - DMU_BSWAP_ZAP, - DMU_BSWAP_DNODE, - DMU_BSWAP_OBJSET, - DMU_BSWAP_ZNODE, - DMU_BSWAP_OLDACL, - DMU_BSWAP_ACL, - DMU_BSWAP_NUMFUNCS -} dmu_object_byteswap_t; /* * This file describes the interface that the DMU provides for its @@ -63,6 +30,41 @@ * The DMU also interacts with the SPA. That interface is described in * dmu_spa.h. */ + +#define B_FALSE 0 +#define B_TRUE 1 + +#define DMU_OT_NEWTYPE 0x80 +#define DMU_OT_METADATA 0x40 +#define DMU_OT_BYTESWAP_MASK 0x3f + +#define DMU_OT(byteswap, metadata) \ + (DMU_OT_NEWTYPE | \ + ((metadata) ? DMU_OT_METADATA : 0) | \ + ((byteswap) & DMU_OT_BYTESWAP_MASK)) + +#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ + (ot) < DMU_OT_NUMTYPES) + +#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_METADATA) : \ + dmu_ot[(ot)].ot_metadata) + +typedef enum dmu_object_byteswap { + DMU_BSWAP_UINT8, + DMU_BSWAP_UINT16, + DMU_BSWAP_UINT32, + DMU_BSWAP_UINT64, + DMU_BSWAP_ZAP, + DMU_BSWAP_DNODE, + DMU_BSWAP_OBJSET, + DMU_BSWAP_ZNODE, + DMU_BSWAP_OLDACL, + DMU_BSWAP_ACL, + DMU_BSWAP_NUMFUNCS +} dmu_object_byteswap_t; + typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ @@ -122,17 +124,22 @@ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ DMU_OT_DSL_KEYCHAIN = 54, + DMU_OT_SHARE, /* UINT64 */ + DMU_OT_DSL_SHARES, /* ZAP */ + DMU_OT_BPMAP_ARRAY, + DMU_OT_BPMAP_DEFER, DMU_OT_NUMTYPES, - DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), - DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), - DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), - DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), - DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), - DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), - DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), - DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), - DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), - DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE) } dmu_object_type_t; typedef enum dmu_objset_type { @@ -160,5 +167,9 @@ #define DMU_POOL_PROPS "pool_props" #define DMU_POOL_L2CACHE "l2cache" #define DMU_POOL_FEATURES_FOR_READ "features_for_read" +#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write" +#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions" +#define DMU_POOL_BPMAP_OBJ "bpmap-vdev-%llu-%llu" +#define DMU_POOL_BPMAP_DEFER "bpmap_defer_obj" #endif /* _SYS_DMU_H */ diff -Naur grub-2.02~beta2_/include/grub/zfs/dnode.h grub-2.02~beta2/include/grub/zfs/dnode.h --- grub-2.02~beta2_/include/grub/zfs/dnode.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/dnode.h 2015-10-03 17:18:23.000000000 -0700 @@ -18,6 +18,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2014 by Delphix. All rights reserved. */ #ifndef _SYS_DNODE_H @@ -29,6 +30,8 @@ * Fixed constants. */ #define DNODE_SHIFT 9 /* 512 bytes */ +#define DN_MIN_INDBLKSHIFT 12 /* 1k */ +#define DN_MAX_INDBLKSHIFT 14 /* 16k */ #define DNODE_BLOCK_SHIFT 14 /* 16k */ #define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ diff -Naur grub-2.02~beta2_/include/grub/zfs/spa.h grub-2.02~beta2/include/grub/zfs/spa.h --- grub-2.02~beta2_/include/grub/zfs/spa.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/spa.h 2015-10-03 17:18:23.000000000 -0700 @@ -1,7 +1,7 @@ /* * GRUB -- GRand Unified Bootloader * Copyright (C) 1999,2000,2001,2002,2003,2004,2009 Free Software Foundation, Inc. - * Copyright 2010 Sun Microsystems, Inc. + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. * * GRUB is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -67,6 +67,8 @@ BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) #define SPA_MINBLOCKSHIFT 9 +#define SPA_MAXBLOCKSHIFT 24 +#define SPA_128KBLOCKSHIFT 17 #define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) /* @@ -126,7 +128,7 @@ * +-------+-------+-------+-------+-------+-------+-------+-------+ * 5 |G| offset3 | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE | + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -160,7 +162,8 @@ * G gang block indicator * B byteorder (endianness) * D dedup - * X unused + * X encryption (on version 30, which is not supported in openzfs) + * E blkptr_t contains embedded data * lvl level of indirection * type DMU object type * phys birth txg of block allocation; zero if same as logical birth txg @@ -195,18 +198,38 @@ #define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) #define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) -#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) +#define DVA_GET_OFFSET(dva, e) \ + BF64_GET_SB((grub_zfs_to_cpu64 ((dva)->dva_word[1], (e))), 0, 63, \ + SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_OFFSET(dva, x) \ + BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GANG(dva, e) \ + BF64_GET((grub_zfs_to_cpu64 ((dva)->dva_word[1], (e))), 62, 1) #define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) -#define BP_GET_LSIZE(bp) \ - BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) +#define BP_GET_LSIZE(bp, e) \ + (BP_IS_HOLE(bp, e) ? 0 : \ + BF64_GET_SB((grub_zfs_to_cpu64((bp)->blk_prop, e)), 0, SPA_LSIZEBITS, \ + SPA_MINBLOCKSHIFT, 1)) #define BP_SET_LSIZE(bp, x) \ - BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_PSIZE(bp, e) \ + BF64_GET_SB((grub_zfs_to_cpu64((bp)->blk_prop, e)), 16, SPA_PSIZEBITS, \ + SPA_MINBLOCKSHIFT, 1) +#define BP_SET_PSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_COMPRESS(bp, e) \ + BF64_GET(grub_zfs_to_cpu64((bp)->blk_prop, e), 32, 7) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x) -#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) -#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) +#define BP_IS_EMBEDDED(bp, e) \ + BF64_GET(grub_zfs_to_cpu64((bp)->blk_prop, e), 39, 1) -#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) +#define BP_GET_CHECKSUM(bp, e) \ + BF64_GET(grub_zfs_to_cpu64((bp)->blk_prop, e), 40, 8) #define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) #define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) @@ -278,8 +301,15 @@ } #define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) -#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) -#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) +#define BP_IS_GANG(bp, e) \ + ((BP_IS_EMBEDDED((bp), (e))) ? B_FALSE : \ + (DVA_GET_GANG((BP_IDENTITY(bp)),(e)))) +#define DVA_IS_EMPTY(dva) \ + ((dva)->dva_word[0] == 0ULL && \ + (dva)->dva_word[1] == 0ULL) + +#define BP_IS_HOLE(bp, e) \ + (!BP_IS_EMBEDDED(bp, e) && DVA_IS_EMPTY(BP_IDENTITY(bp))) /* BP_IS_RAIDZ(bp) assumes no block compression */ #define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \ @@ -302,6 +332,22 @@ ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ } +#define BPE_GET_ETYPE(bp, e) BP_GET_CHECKSUM(bp, e) +#define BPE_GET_LSIZE(bp, e) \ + BF64_GET_SB((grub_zfs_to_cpu64 ((bp)->blk_prop, e)), 0, 25, 0, 1) +#define BPE_GET_PSIZE(bp, e) \ + BF64_GET_SB((grub_zfs_to_cpu64 ((bp)->blk_prop, e)), 25, 7, 0, 1) + +typedef enum bp_embedded_type { + BP_EMBEDDED_TYPE_DATA, + NUM_BP_EMBEDDED_TYPES +} bp_embedded_type_t; + +#define BPE_NUM_WORDS 14 +#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (grub_uint64_t)) +#define BPE_IS_PAYLOADWORD(bp, wp) \ + ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) + #define BP_SPRINTF_LEN 320 #endif /* ! GRUB_ZFS_SPA_HEADER */ diff -Naur grub-2.02~beta2_/include/grub/zfs/uberblock_impl.h grub-2.02~beta2/include/grub/zfs/uberblock_impl.h --- grub-2.02~beta2_/include/grub/zfs/uberblock_impl.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/uberblock_impl.h 2015-10-03 17:18:23.000000000 -0700 @@ -1,6 +1,7 @@ /* * GRUB -- GRand Unified Bootloader * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. * * GRUB is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,10 +16,6 @@ * You should have received a copy of the GNU General Public License * along with GRUB. If not, see . */ -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ #ifndef _SYS_UBERBLOCK_IMPL_H #define _SYS_UBERBLOCK_IMPL_H @@ -43,18 +40,21 @@ grub_uint64_t ub_guid_sum; /* sum of all vdev guids */ grub_uint64_t ub_timestamp; /* UTC time of last sync */ blkptr_t ub_rootbp; /* MOS objset_phys_t */ + + /* highest SPA_VERSION supported by software that wrote this txg */ + grub_uint64_t ub_software_version; } uberblock_t; +#define ZFS_MAX(x,y) (((x) > (y)) ? (x) : (y)) +#define ZFS_MIN(x,y) (((x) < (y)) ? (x) : (y)) +#define MAX_UBERBLOCK_SHIFT (13) #define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) -#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT - -/* XXX Uberblock_phys_t is no longer in the kernel zfs */ -typedef struct uberblock_phys { - uberblock_t ubp_uberblock; - char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - - sizeof (zio_eck_t)]; - zio_eck_t ubp_zec; -} uberblock_phys_t; - +#define VDEV_UBERBLOCK_SHIFT(a) \ + (ZFS_MIN((ZFS_MAX(UBERBLOCK_SHIFT, (a))), MAX_UBERBLOCK_SHIFT)) +#define VDEV_UBERBLOCK_COUNT(a) \ + (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(a)) +#define VDEV_UBERBLOCK_OFFSET(a, n) \ + offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(a)]) +#define VDEV_UBERBLOCK_SIZE(a) (1ULL << VDEV_UBERBLOCK_SHIFT(a)) #endif /* _SYS_UBERBLOCK_IMPL_H */ diff -Naur grub-2.02~beta2_/include/grub/zfs/vdev_impl.h grub-2.02~beta2/include/grub/zfs/vdev_impl.h --- grub-2.02~beta2_/include/grub/zfs/vdev_impl.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/vdev_impl.h 2015-10-03 17:18:23.000000000 -0700 @@ -23,6 +23,8 @@ #ifndef _SYS_VDEV_IMPL_H #define _SYS_VDEV_IMPL_H +#define VDEV_RAIDZ_MAXPARITY 3 + #define VDEV_SKIP_SIZE (8 << 10) #define VDEV_BOOT_HEADER_SIZE (8 << 10) #define VDEV_PHYS_SIZE (112 << 10) diff -Naur grub-2.02~beta2_/include/grub/zfs/zap_impl.h grub-2.02~beta2/include/grub/zfs/zap_impl.h --- grub-2.02~beta2_/include/grub/zfs/zap_impl.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/zap_impl.h 2015-10-03 17:18:23.000000000 -0700 @@ -1,6 +1,7 @@ /* * GRUB -- GRand Unified Bootloader * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. * * GRUB is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,10 +16,6 @@ * You should have received a copy of the GNU General Public License * along with GRUB. If not, see . */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ #ifndef _SYS_ZAP_IMPL_H #define _SYS_ZAP_IMPL_H @@ -28,6 +25,8 @@ #define ZAP_HASHBITS 28 #define MZAP_ENT_LEN 64 #define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) +#define MZAP_MAX_BLKSHIFT SPA_128KBLOCKSHIFT +#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT) typedef struct mzap_ent_phys { grub_uint64_t mze_value; diff -Naur grub-2.02~beta2_/include/grub/zfs/zfs.h grub-2.02~beta2/include/grub/zfs/zfs.h --- grub-2.02~beta2_/include/grub/zfs/zfs.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/zfs.h 2015-10-03 17:18:23.000000000 -0700 @@ -1,6 +1,7 @@ /* * GRUB -- GRand Unified Bootloader * Copyright (C) 1999,2000,2001,2002,2003,2004,2009 Free Software Foundation, Inc. + * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * * GRUB is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,9 +16,6 @@ * You should have received a copy of the GNU General Public License * along with GRUB. If not, see . */ - /* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - */ #ifndef GRUB_ZFS_HEADER #define GRUB_ZFS_HEADER 1 @@ -38,10 +36,12 @@ */ #define SPA_VERSION_INITIAL 1ULL #define SPA_VERSION_BEFORE_FEATURES 33ULL +#define SPA_VERSION_ORACLE 35ULL +#define SPA_VERSION 5000ULL #define SPA_VERSION_FEATURES 5000ULL #define SPA_VERSION_IS_SUPPORTED(v) \ - (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ - ((v) == SPA_VERSION_FEATURES)) + (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_ORACLE) || \ + ((v) >= SPA_VERSION_FEATURES && (v) == SPA_VERSION)) /* * The following are configuration names used in the nvlist describing a pool's * configuration. @@ -124,16 +124,18 @@ grub_err_t grub_zfs_fetch_nvlist (grub_device_t dev, char **nvlist); grub_err_t grub_zfs_getmdnobj (grub_device_t dev, const char *fsfilename, grub_uint64_t *mdnobj); - +grub_err_t grub_zfs_defaultbootfsobj (grub_device_t dev, grub_uint64_t *mdnobj); +grub_err_t grub_zfs_defaultbootfsname (grub_device_t dev, char **bootfsname); char *grub_zfs_nvlist_lookup_string (const char *nvlist, const char *name); char *grub_zfs_nvlist_lookup_nvlist (const char *nvlist, const char *name); int grub_zfs_nvlist_lookup_uint64 (const char *nvlist, const char *name, grub_uint64_t *out); char *grub_zfs_nvlist_lookup_nvlist_array (const char *nvlist, const char *name, - grub_size_t array_index); + grub_size_t index); int grub_zfs_nvlist_lookup_nvlist_array_get_nelm (const char *nvlist, const char *name); +int grub_zfs_vdev_validate (const char *nv); grub_err_t grub_zfs_add_key (grub_uint8_t *key_in, grub_size_t keylen, diff -Naur grub-2.02~beta2_/include/grub/zfs/zfs_znode.h grub-2.02~beta2/include/grub/zfs/zfs_znode.h --- grub-2.02~beta2_/include/grub/zfs/zfs_znode.h 2013-12-24 08:29:27.000000000 -0800 +++ grub-2.02~beta2/include/grub/zfs/zfs_znode.h 2015-10-03 17:18:23.000000000 -0700 @@ -1,6 +1,7 @@ /* * GRUB -- GRand Unified Bootloader * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. * * GRUB is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,10 +16,6 @@ * You should have received a copy of the GNU General Public License * along with GRUB. If not, see . */ -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ #ifndef _SYS_FS_ZFS_ZNODE_H #define _SYS_FS_ZFS_ZNODE_H @@ -30,7 +27,7 @@ #define ZPL_VERSION_STR "VERSION" #define ZFS_SA_ATTRS "SA_ATTRS" -#define ZPL_VERSION 5ULL +#define ZPL_VERSION 6ULL #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)