zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

g_raid3.h (14604B) - Raw


      1 /*-
      2  * SPDX-License-Identifier: BSD-2-Clause
      3  *
      4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #ifndef	_G_RAID3_H_
     30 #define	_G_RAID3_H_
     31 
     32 #include <sys/endian.h>
     33 #include <sys/md5.h>
     34 
     35 #define	G_RAID3_CLASS_NAME	"RAID3"
     36 
     37 #define	G_RAID3_MAGIC		"GEOM::RAID3"
     38 /*
     39  * Version history:
     40  * 0 - Initial version number.
     41  * 1 - Added 'round-robin reading' algorithm.
     42  * 2 - Added 'verify reading' algorithm.
     43  * 3 - Added md_genid field to metadata.
     44  * 4 - Added md_provsize field to metadata.
     45  * 5 - Added 'no failure synchronization' flag.
     46  */
     47 #define	G_RAID3_VERSION		5
     48 
     49 #define	G_RAID3_DISK_FLAG_DIRTY		0x0000000000000001ULL
     50 #define	G_RAID3_DISK_FLAG_SYNCHRONIZING	0x0000000000000002ULL
     51 #define	G_RAID3_DISK_FLAG_FORCE_SYNC	0x0000000000000004ULL
     52 #define	G_RAID3_DISK_FLAG_HARDCODED	0x0000000000000008ULL
     53 #define	G_RAID3_DISK_FLAG_BROKEN	0x0000000000000010ULL
     54 #define	G_RAID3_DISK_FLAG_MASK		(G_RAID3_DISK_FLAG_DIRTY |	\
     55 					 G_RAID3_DISK_FLAG_SYNCHRONIZING | \
     56 					 G_RAID3_DISK_FLAG_FORCE_SYNC)
     57 
     58 #define	G_RAID3_DEVICE_FLAG_NOAUTOSYNC	0x0000000000000001ULL
     59 #define	G_RAID3_DEVICE_FLAG_ROUND_ROBIN	0x0000000000000002ULL
     60 #define	G_RAID3_DEVICE_FLAG_VERIFY	0x0000000000000004ULL
     61 #define	G_RAID3_DEVICE_FLAG_NOFAILSYNC	0x0000000000000008ULL
     62 #define	G_RAID3_DEVICE_FLAG_MASK	(G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \
     63 					 G_RAID3_DEVICE_FLAG_ROUND_ROBIN | \
     64 					 G_RAID3_DEVICE_FLAG_VERIFY | \
     65 					 G_RAID3_DEVICE_FLAG_NOFAILSYNC)
     66 
     67 #ifdef _KERNEL
     68 extern u_int g_raid3_debug;
     69 
     70 #define	G_RAID3_DEBUG(lvl, ...) \
     71     _GEOM_DEBUG("GEOM_RAID3", g_raid3_debug, (lvl), NULL, __VA_ARGS__)
     72 #define	G_RAID3_LOGREQ(lvl, bp, ...) \
     73     _GEOM_DEBUG("GEOM_RAID3", g_raid3_debug, (lvl), (bp), __VA_ARGS__)
     74 
     75 #define	G_RAID3_BIO_CFLAG_REGULAR	0x01
     76 #define	G_RAID3_BIO_CFLAG_SYNC		0x02
     77 #define	G_RAID3_BIO_CFLAG_PARITY	0x04
     78 #define	G_RAID3_BIO_CFLAG_NODISK	0x08
     79 #define	G_RAID3_BIO_CFLAG_REGSYNC	0x10
     80 #define	G_RAID3_BIO_CFLAG_MASK		(G_RAID3_BIO_CFLAG_REGULAR |	\
     81 					 G_RAID3_BIO_CFLAG_SYNC |	\
     82 					 G_RAID3_BIO_CFLAG_PARITY |	\
     83 					 G_RAID3_BIO_CFLAG_NODISK |	\
     84 					 G_RAID3_BIO_CFLAG_REGSYNC)
     85 
     86 #define	G_RAID3_BIO_PFLAG_DEGRADED	0x01
     87 #define	G_RAID3_BIO_PFLAG_NOPARITY	0x02
     88 #define	G_RAID3_BIO_PFLAG_VERIFY	0x04
     89 #define	G_RAID3_BIO_PFLAG_MASK		(G_RAID3_BIO_PFLAG_DEGRADED |	\
     90 					 G_RAID3_BIO_PFLAG_NOPARITY |	\
     91 					 G_RAID3_BIO_PFLAG_VERIFY)
     92 
     93 /*
     94  * Informations needed for synchronization.
     95  */
     96 struct g_raid3_disk_sync {
     97 	struct g_consumer *ds_consumer;	/* Consumer connected to our device. */
     98 	off_t		  ds_offset;	/* Offset of next request to send. */
     99 	off_t		  ds_offset_done; /* Offset of already synchronized
    100 					   region. */
    101 	off_t		  ds_resync;	/* Resynchronize from this offset. */
    102 	u_int		  ds_syncid;	/* Disk's synchronization ID. */
    103 	u_int		  ds_inflight;	/* Number of in-flight sync requests. */
    104 	struct bio	**ds_bios;	/* BIOs for synchronization I/O. */
    105 };
    106 
    107 /*
    108  * Informations needed for synchronization.
    109  */
    110 struct g_raid3_device_sync {
    111 	struct g_geom	*ds_geom;	/* Synchronization geom. */
    112 };
    113 
    114 #define	G_RAID3_DISK_STATE_NODISK		0
    115 #define	G_RAID3_DISK_STATE_NONE			1
    116 #define	G_RAID3_DISK_STATE_NEW			2
    117 #define	G_RAID3_DISK_STATE_ACTIVE		3
    118 #define	G_RAID3_DISK_STATE_STALE		4
    119 #define	G_RAID3_DISK_STATE_SYNCHRONIZING	5
    120 #define	G_RAID3_DISK_STATE_DISCONNECTED		6
    121 #define	G_RAID3_DISK_STATE_DESTROY		7
    122 struct g_raid3_disk {
    123 	u_int		 d_no;		/* Disk number. */
    124 	struct g_consumer *d_consumer;	/* Consumer. */
    125 	struct g_raid3_softc *d_softc;	/* Back-pointer to softc. */
    126 	int		 d_state;	/* Disk state. */
    127 	uint64_t	 d_flags;	/* Additional flags. */
    128 	u_int		 d_genid;	/* Disk's generation ID. */
    129 	struct g_raid3_disk_sync d_sync; /* Sync information. */
    130 	LIST_ENTRY(g_raid3_disk) d_next;
    131 };
    132 #define	d_name	d_consumer->provider->name
    133 
    134 #define	G_RAID3_EVENT_DONTWAIT	0x1
    135 #define	G_RAID3_EVENT_WAIT	0x2
    136 #define	G_RAID3_EVENT_DEVICE	0x4
    137 #define	G_RAID3_EVENT_DONE	0x8
    138 struct g_raid3_event {
    139 	struct g_raid3_disk	*e_disk;
    140 	int			 e_state;
    141 	int			 e_flags;
    142 	int			 e_error;
    143 	TAILQ_ENTRY(g_raid3_event) e_next;
    144 };
    145 
    146 #define	G_RAID3_DEVICE_FLAG_DESTROY	0x0100000000000000ULL
    147 #define	G_RAID3_DEVICE_FLAG_WAIT	0x0200000000000000ULL
    148 #define	G_RAID3_DEVICE_FLAG_DESTROYING	0x0400000000000000ULL
    149 
    150 #define	G_RAID3_DEVICE_STATE_STARTING		0
    151 #define	G_RAID3_DEVICE_STATE_DEGRADED		1
    152 #define	G_RAID3_DEVICE_STATE_COMPLETE		2
    153 
    154 /* Bump syncid on first write. */
    155 #define	G_RAID3_BUMP_SYNCID	0x1
    156 /* Bump genid immediately. */
    157 #define	G_RAID3_BUMP_GENID	0x2
    158 
    159 enum g_raid3_zones {
    160 	G_RAID3_ZONE_64K,
    161 	G_RAID3_ZONE_16K,
    162 	G_RAID3_ZONE_4K,
    163 	G_RAID3_NUM_ZONES
    164 };
    165 
    166 static __inline enum g_raid3_zones
    167 g_raid3_zone(size_t nbytes) {
    168 	if (nbytes > 65536)
    169 		return (G_RAID3_NUM_ZONES);
    170 	else if (nbytes > 16384)
    171 		return (G_RAID3_ZONE_64K);
    172 	else if (nbytes > 4096)
    173 		return (G_RAID3_ZONE_16K);
    174 	else
    175 		return (G_RAID3_ZONE_4K);
    176 };
    177 
    178 struct g_raid3_softc {
    179 	u_int		sc_state;	/* Device state. */
    180 	uint64_t	sc_mediasize;	/* Device size. */
    181 	uint32_t	sc_sectorsize;	/* Sector size. */
    182 	uint64_t	sc_flags;	/* Additional flags. */
    183 
    184 	struct g_geom	*sc_geom;
    185 	struct g_provider *sc_provider;
    186 
    187 	uint32_t	sc_id;		/* Device unique ID. */
    188 
    189 	struct sx	 sc_lock;
    190 	struct bio_queue_head sc_queue;
    191 	struct mtx	 sc_queue_mtx;
    192 	struct proc	*sc_worker;
    193 	struct bio_queue_head sc_regular_delayed; /* Delayed I/O requests due
    194 						     collision with sync
    195 						     requests. */
    196 	struct bio_queue_head sc_inflight; /* In-flight regular write
    197 					      requests. */
    198 	struct bio_queue_head sc_sync_delayed; /* Delayed sync requests due
    199 						  collision with regular
    200 						  requests. */
    201 
    202 	struct g_raid3_disk *sc_disks;
    203 	u_int		sc_ndisks;	/* Number of disks. */
    204 	u_int		sc_round_robin;
    205 	struct g_raid3_disk *sc_syncdisk;
    206 
    207 	struct g_raid3_zone {
    208 		uma_zone_t	sz_zone;
    209 		size_t		sz_inuse;
    210 		size_t		sz_max;
    211 		u_int		sz_requested;
    212 		u_int		sz_failed;
    213 	} sc_zones[G_RAID3_NUM_ZONES];
    214 
    215 	u_int		sc_genid;	/* Generation ID. */
    216 	u_int		sc_syncid;	/* Synchronization ID. */
    217 	int		sc_bump_id;
    218 	struct g_raid3_device_sync sc_sync;
    219 	int		sc_idle;	/* DIRTY flags removed. */
    220 	time_t		sc_last_write;
    221 	u_int		sc_writes;
    222 	u_int		sc_refcnt;	/* Number of softc references. */
    223 
    224 	TAILQ_HEAD(, g_raid3_event) sc_events;
    225 	struct mtx	sc_events_mtx;
    226 	struct g_raid3_event *sc_timeout_event;
    227 
    228 	struct callout	sc_callout;
    229 
    230 	struct root_hold_token *sc_rootmount;
    231 };
    232 #define	sc_name	sc_geom->name
    233 
    234 const char *g_raid3_get_diskname(struct g_raid3_disk *disk);
    235 u_int g_raid3_ndisks(struct g_raid3_softc *sc, int state);
    236 #define	G_RAID3_DESTROY_SOFT	0
    237 #define	G_RAID3_DESTROY_DELAYED	1
    238 #define	G_RAID3_DESTROY_HARD	2
    239 int g_raid3_destroy(struct g_raid3_softc *sc, int how);
    240 int g_raid3_event_send(void *arg, int state, int flags);
    241 struct g_raid3_metadata;
    242 int g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
    243     struct g_raid3_metadata *md);
    244 int g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md);
    245 void g_raid3_fill_metadata(struct g_raid3_disk *disk,
    246     struct g_raid3_metadata *md);
    247 int g_raid3_clear_metadata(struct g_raid3_disk *disk);
    248 void g_raid3_update_metadata(struct g_raid3_disk *disk);
    249 
    250 g_ctl_req_t g_raid3_config;
    251 #endif	/* _KERNEL */
    252 
    253 struct g_raid3_metadata {
    254 	char		md_magic[16];	/* Magic value. */
    255 	uint32_t	md_version;	/* Version number. */
    256 	char		md_name[16];	/* Device name. */
    257 	uint32_t	md_id;		/* Device unique ID. */
    258 	uint16_t	md_no;		/* Component number. */
    259 	uint16_t	md_all;		/* Number of disks in device. */
    260 	uint32_t	md_genid;	/* Generation ID. */
    261 	uint32_t	md_syncid;	/* Synchronization ID. */
    262 	uint64_t	md_mediasize;	/* Size of whole device. */
    263 	uint32_t	md_sectorsize;	/* Sector size. */
    264 	uint64_t	md_sync_offset;	/* Synchronized offset. */
    265 	uint64_t	md_mflags;	/* Additional device flags. */
    266 	uint64_t	md_dflags;	/* Additional disk flags. */
    267 	char		md_provider[16]; /* Hardcoded provider. */
    268 	uint64_t	md_provsize;	/* Provider's size. */
    269 	u_char		md_hash[16];	/* MD5 hash. */
    270 };
    271 static __inline void
    272 raid3_metadata_encode(struct g_raid3_metadata *md, u_char *data)
    273 {
    274 	MD5_CTX ctx;
    275 
    276 	bcopy(md->md_magic, data, 16);
    277 	le32enc(data + 16, md->md_version);
    278 	bcopy(md->md_name, data + 20, 16);
    279 	le32enc(data + 36, md->md_id);
    280 	le16enc(data + 40, md->md_no);
    281 	le16enc(data + 42, md->md_all);
    282 	le32enc(data + 44, md->md_genid);
    283 	le32enc(data + 48, md->md_syncid);
    284 	le64enc(data + 52, md->md_mediasize);
    285 	le32enc(data + 60, md->md_sectorsize);
    286 	le64enc(data + 64, md->md_sync_offset);
    287 	le64enc(data + 72, md->md_mflags);
    288 	le64enc(data + 80, md->md_dflags);
    289 	bcopy(md->md_provider, data + 88, 16);
    290 	le64enc(data + 104, md->md_provsize);
    291 	MD5Init(&ctx);
    292 	MD5Update(&ctx, data, 112);
    293 	MD5Final(md->md_hash, &ctx);
    294 	bcopy(md->md_hash, data + 112, 16);
    295 }
    296 static __inline int
    297 raid3_metadata_decode_v0v1v2(const u_char *data, struct g_raid3_metadata *md)
    298 {
    299 	MD5_CTX ctx;
    300 
    301 	bcopy(data + 20, md->md_name, 16);
    302 	md->md_id = le32dec(data + 36);
    303 	md->md_no = le16dec(data + 40);
    304 	md->md_all = le16dec(data + 42);
    305 	md->md_syncid = le32dec(data + 44);
    306 	md->md_mediasize = le64dec(data + 48);
    307 	md->md_sectorsize = le32dec(data + 56);
    308 	md->md_sync_offset = le64dec(data + 60);
    309 	md->md_mflags = le64dec(data + 68);
    310 	md->md_dflags = le64dec(data + 76);
    311 	bcopy(data + 84, md->md_provider, 16);
    312 	bcopy(data + 100, md->md_hash, 16);
    313 	MD5Init(&ctx);
    314 	MD5Update(&ctx, data, 100);
    315 	MD5Final(md->md_hash, &ctx);
    316 	if (bcmp(md->md_hash, data + 100, 16) != 0)
    317 		return (EINVAL);
    318 
    319 	/* New fields. */
    320 	md->md_genid = 0;
    321 	md->md_provsize = 0;
    322 
    323 	return (0);
    324 }
    325 static __inline int
    326 raid3_metadata_decode_v3(const u_char *data, struct g_raid3_metadata *md)
    327 {
    328 	MD5_CTX ctx;
    329 
    330 	bcopy(data + 20, md->md_name, 16);
    331 	md->md_id = le32dec(data + 36);
    332 	md->md_no = le16dec(data + 40);
    333 	md->md_all = le16dec(data + 42);
    334 	md->md_genid = le32dec(data + 44);
    335 	md->md_syncid = le32dec(data + 48);
    336 	md->md_mediasize = le64dec(data + 52);
    337 	md->md_sectorsize = le32dec(data + 60);
    338 	md->md_sync_offset = le64dec(data + 64);
    339 	md->md_mflags = le64dec(data + 72);
    340 	md->md_dflags = le64dec(data + 80);
    341 	bcopy(data + 88, md->md_provider, 16);
    342 	bcopy(data + 104, md->md_hash, 16);
    343 	MD5Init(&ctx);
    344 	MD5Update(&ctx, data, 104);
    345 	MD5Final(md->md_hash, &ctx);
    346 	if (bcmp(md->md_hash, data + 104, 16) != 0)
    347 		return (EINVAL);
    348 
    349 	/* New fields. */
    350 	md->md_provsize = 0;
    351 
    352 	return (0);
    353 }
    354 static __inline int
    355 raid3_metadata_decode_v4v5(const u_char *data, struct g_raid3_metadata *md)
    356 {
    357 	MD5_CTX ctx;
    358 
    359 	bcopy(data + 20, md->md_name, 16);
    360 	md->md_id = le32dec(data + 36);
    361 	md->md_no = le16dec(data + 40);
    362 	md->md_all = le16dec(data + 42);
    363 	md->md_genid = le32dec(data + 44);
    364 	md->md_syncid = le32dec(data + 48);
    365 	md->md_mediasize = le64dec(data + 52);
    366 	md->md_sectorsize = le32dec(data + 60);
    367 	md->md_sync_offset = le64dec(data + 64);
    368 	md->md_mflags = le64dec(data + 72);
    369 	md->md_dflags = le64dec(data + 80);
    370 	bcopy(data + 88, md->md_provider, 16);
    371 	md->md_provsize = le64dec(data + 104);
    372 	bcopy(data + 112, md->md_hash, 16);
    373 	MD5Init(&ctx);
    374 	MD5Update(&ctx, data, 112);
    375 	MD5Final(md->md_hash, &ctx);
    376 	if (bcmp(md->md_hash, data + 112, 16) != 0)
    377 		return (EINVAL);
    378 	return (0);
    379 }
    380 static __inline int
    381 raid3_metadata_decode(const u_char *data, struct g_raid3_metadata *md)
    382 {
    383 	int error;
    384 
    385 	bcopy(data, md->md_magic, 16);
    386 	md->md_version = le32dec(data + 16);
    387 	switch (md->md_version) {
    388 	case 0:
    389 	case 1:
    390 	case 2:
    391 		error = raid3_metadata_decode_v0v1v2(data, md);
    392 		break;
    393 	case 3:
    394 		error = raid3_metadata_decode_v3(data, md);
    395 		break;
    396 	case 4:
    397 	case 5:
    398 		error = raid3_metadata_decode_v4v5(data, md);
    399 		break;
    400 	default:
    401 		error = EINVAL;
    402 		break;
    403 	}
    404 	return (error);
    405 }
    406 
    407 static __inline void
    408 raid3_metadata_dump(const struct g_raid3_metadata *md)
    409 {
    410 	static const char hex[] = "0123456789abcdef";
    411 	char hash[16 * 2 + 1];
    412 	u_int i;
    413 
    414 	printf("     magic: %s\n", md->md_magic);
    415 	printf("   version: %u\n", (u_int)md->md_version);
    416 	printf("      name: %s\n", md->md_name);
    417 	printf("        id: %u\n", (u_int)md->md_id);
    418 	printf("        no: %u\n", (u_int)md->md_no);
    419 	printf("       all: %u\n", (u_int)md->md_all);
    420 	printf("     genid: %u\n", (u_int)md->md_genid);
    421 	printf("    syncid: %u\n", (u_int)md->md_syncid);
    422 	printf(" mediasize: %jd\n", (intmax_t)md->md_mediasize);
    423 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
    424 	printf("syncoffset: %jd\n", (intmax_t)md->md_sync_offset);
    425 	printf("    mflags:");
    426 	if (md->md_mflags == 0)
    427 		printf(" NONE");
    428 	else {
    429 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0)
    430 			printf(" NOAUTOSYNC");
    431 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0)
    432 			printf(" ROUND-ROBIN");
    433 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0)
    434 			printf(" VERIFY");
    435 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
    436 			printf(" NOFAILSYNC");
    437 	}
    438 	printf("\n");
    439 	printf("    dflags:");
    440 	if (md->md_dflags == 0)
    441 		printf(" NONE");
    442 	else {
    443 		if ((md->md_dflags & G_RAID3_DISK_FLAG_DIRTY) != 0)
    444 			printf(" DIRTY");
    445 		if ((md->md_dflags & G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0)
    446 			printf(" SYNCHRONIZING");
    447 		if ((md->md_dflags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0)
    448 			printf(" FORCE_SYNC");
    449 	}
    450 	printf("\n");
    451 	printf("hcprovider: %s\n", md->md_provider);
    452 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
    453 	bzero(hash, sizeof(hash));
    454 	for (i = 0; i < 16; i++) {
    455 		hash[i * 2] = hex[md->md_hash[i] >> 4];
    456 		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
    457 	}
    458 	printf("  MD5 hash: %s\n", hash);
    459 }
    460 #endif	/* !_G_RAID3_H_ */