File: /usr/src/linux/drivers/md/raid0.c

1     /*
2        raid0.c : Multiple Devices driver for Linux
3                  Copyright (C) 1994-96 Marc ZYNGIER
4     	     <zyngier@ufr-info-p7.ibp.fr> or
5     	     <maz@gloups.fdn.fr>
6                  Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
7     
8     
9        RAID-0 management functions.
10     
11        This program is free software; you can redistribute it and/or modify
12        it under the terms of the GNU General Public License as published by
13        the Free Software Foundation; either version 2, or (at your option)
14        any later version.
15        
16        You should have received a copy of the GNU General Public License
17        (for example /usr/src/linux/COPYING); if not, write to the Free
18        Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
19     */
20     
21     #include <linux/module.h>
22     #include <linux/raid/raid0.h>
23     
24     #define MAJOR_NR MD_MAJOR
25     #define MD_DRIVER
26     #define MD_PERSONALITY
27     
28     static int create_strip_zones (mddev_t *mddev)
29     {
30     	int i, c, j, j1, j2;
31     	unsigned long current_offset, curr_zone_offset;
32     	raid0_conf_t *conf = mddev_to_conf(mddev);
33     	mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
34      
35     	/*
36     	 * The number of 'same size groups'
37     	 */
38     	conf->nr_strip_zones = 0;
39      
40     	ITERATE_RDEV_ORDERED(mddev,rdev1,j1) {
41     		printk("raid0: looking at %s\n", partition_name(rdev1->dev));
42     		c = 0;
43     		ITERATE_RDEV_ORDERED(mddev,rdev2,j2) {
44     			printk("raid0:   comparing %s(%ld) with %s(%ld)\n", partition_name(rdev1->dev), rdev1->size, partition_name(rdev2->dev), rdev2->size);
45     			if (rdev2 == rdev1) {
46     				printk("raid0:   END\n");
47     				break;
48     			}
49     			if (rdev2->size == rdev1->size)
50     			{
51     				/*
52     				 * Not unique, dont count it as a new
53     				 * group
54     				 */
55     				printk("raid0:   EQUAL\n");
56     				c = 1;
57     				break;
58     			}
59     			printk("raid0:   NOT EQUAL\n");
60     		}
61     		if (!c) {
62     			printk("raid0:   ==> UNIQUE\n");
63     			conf->nr_strip_zones++;
64     			printk("raid0: %d zones\n", conf->nr_strip_zones);
65     		}
66     	}
67     		printk("raid0: FINAL %d zones\n", conf->nr_strip_zones);
68     
69     	conf->strip_zone = vmalloc(sizeof(struct strip_zone)*
70     				conf->nr_strip_zones);
71     	if (!conf->strip_zone)
72     		return 1;
73     
74     
75     	conf->smallest = NULL;
76     	current_offset = 0;
77     	curr_zone_offset = 0;
78     
79     	for (i = 0; i < conf->nr_strip_zones; i++)
80     	{
81     		struct strip_zone *zone = conf->strip_zone + i;
82     
83     		printk("raid0: zone %d\n", i);
84     		zone->dev_offset = current_offset;
85     		smallest = NULL;
86     		c = 0;
87     
88     		ITERATE_RDEV_ORDERED(mddev,rdev,j) {
89     
90     			printk("raid0: checking %s ...", partition_name(rdev->dev));
91     			if (rdev->size > current_offset)
92     			{
93     				printk(" contained as device %d\n", c);
94     				zone->dev[c] = rdev;
95     				c++;
96     				if (!smallest || (rdev->size <smallest->size)) {
97     					smallest = rdev;
98     					printk("  (%ld) is smallest!.\n", rdev->size);
99     				}
100     			} else
101     				printk(" nope.\n");
102     		}
103     
104     		zone->nb_dev = c;
105     		zone->size = (smallest->size - current_offset) * c;
106     		printk("raid0: zone->nb_dev: %d, size: %ld\n",zone->nb_dev,zone->size);
107     
108     		if (!conf->smallest || (zone->size < conf->smallest->size))
109     			conf->smallest = zone;
110     
111     		zone->zone_offset = curr_zone_offset;
112     		curr_zone_offset += zone->size;
113     
114     		current_offset = smallest->size;
115     		printk("raid0: current zone offset: %ld\n", current_offset);
116     	}
117     	printk("raid0: done.\n");
118     	return 0;
119     }
120     
121     static int raid0_run (mddev_t *mddev)
122     {
123     	unsigned long cur=0, i=0, size, zone0_size, nb_zone;
124     	raid0_conf_t *conf;
125     
126     	MOD_INC_USE_COUNT;
127     
128     	conf = vmalloc(sizeof (raid0_conf_t));
129     	if (!conf)
130     		goto out;
131     	mddev->private = (void *)conf;
132      
133     	if (md_check_ordering(mddev)) {
134     		printk("raid0: disks are not ordered, aborting!\n");
135     		goto out_free_conf;
136     	}
137     
138     	if (create_strip_zones (mddev)) 
139     		goto out_free_conf;
140     
141     	printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]);
142     	printk("raid0 : conf->smallest->size is %ld blocks.\n", conf->smallest->size);
143     	nb_zone = md_size[mdidx(mddev)]/conf->smallest->size +
144     			(md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0);
145     	printk("raid0 : nb_zone is %ld.\n", nb_zone);
146     	conf->nr_zones = nb_zone;
147     
148     	printk("raid0 : Allocating %ld bytes for hash.\n",
149     				nb_zone*sizeof(struct raid0_hash));
150     
151     	conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone);
152     	if (!conf->hash_table)
153     		goto out_free_zone_conf;
154     	size = conf->strip_zone[cur].size;
155     
156     	i = 0;
157     	while (cur < conf->nr_strip_zones) {
158     		conf->hash_table[i].zone0 = conf->strip_zone + cur;
159     
160     		/*
161     		 * If we completely fill the slot
162     		 */
163     		if (size >= conf->smallest->size) {
164     			conf->hash_table[i++].zone1 = NULL;
165     			size -= conf->smallest->size;
166     
167     			if (!size) {
168     				if (++cur == conf->nr_strip_zones)
169     					continue;
170     				size = conf->strip_zone[cur].size;
171     			}
172     			continue;
173     		}
174     		if (++cur == conf->nr_strip_zones) {
175     			/*
176     			 * Last dev, set unit1 as NULL
177     			 */
178     			conf->hash_table[i].zone1=NULL;
179     			continue;
180     		}
181     
182     		/*
183     		 * Here we use a 2nd dev to fill the slot
184     		 */
185     		zone0_size = size;
186     		size = conf->strip_zone[cur].size;
187     		conf->hash_table[i++].zone1 = conf->strip_zone + cur;
188     		size -= (conf->smallest->size - zone0_size);
189     	}
190     	return 0;
191     
192     out_free_zone_conf:
193     	vfree(conf->strip_zone);
194     	conf->strip_zone = NULL;
195     
196     out_free_conf:
197     	vfree(conf);
198     	mddev->private = NULL;
199     out:
200     	MOD_DEC_USE_COUNT;
201     	return 1;
202     }
203     
204     static int raid0_stop (mddev_t *mddev)
205     {
206     	raid0_conf_t *conf = mddev_to_conf(mddev);
207     
208     	vfree (conf->hash_table);
209     	conf->hash_table = NULL;
210     	vfree (conf->strip_zone);
211     	conf->strip_zone = NULL;
212     	vfree (conf);
213     	mddev->private = NULL;
214     
215     	MOD_DEC_USE_COUNT;
216     	return 0;
217     }
218     
219     /*
220      * FIXME - We assume some things here :
221      * - requested buffers NEVER bigger than chunk size,
222      * - requested buffers NEVER cross stripes limits.
223      * Of course, those facts may not be valid anymore (and surely won't...)
224      * Hey guys, there's some work out there ;-)
225      */
226     static int raid0_make_request (mddev_t *mddev,
227     			       int rw, struct buffer_head * bh)
228     {
229     	unsigned int sect_in_chunk, chunksize_bits,  chunk_size;
230     	raid0_conf_t *conf = mddev_to_conf(mddev);
231     	struct raid0_hash *hash;
232     	struct strip_zone *zone;
233     	mdk_rdev_t *tmp_dev;
234     	unsigned long chunk, block, rsect;
235     
236     	chunk_size = mddev->param.chunk_size >> 10;
237     	chunksize_bits = ffz(~chunk_size);
238     	block = bh->b_rsector >> 1;
239     	hash = conf->hash_table + block / conf->smallest->size;
240     
241     	/* Sanity check */
242     	if (chunk_size < (block % chunk_size) + (bh->b_size >> 10))
243     		goto bad_map;
244      
245     	if (!hash)
246     		goto bad_hash;
247     
248     	if (!hash->zone0)
249     		goto bad_zone0;
250      
251     	if (block >= (hash->zone0->size + hash->zone0->zone_offset)) {
252     		if (!hash->zone1)
253     			goto bad_zone1;
254     		zone = hash->zone1;
255     	} else
256     		zone = hash->zone0;
257         
258     	sect_in_chunk = bh->b_rsector & ((chunk_size<<1) -1);
259     	chunk = (block - zone->zone_offset) / (zone->nb_dev << chunksize_bits);
260     	tmp_dev = zone->dev[(block >> chunksize_bits) % zone->nb_dev];
261     	rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1)
262     		+ sect_in_chunk;
263      
264     	/*
265     	 * The new BH_Lock semantics in ll_rw_blk.c guarantee that this
266     	 * is the only IO operation happening on this bh.
267     	 */
268     	bh->b_rdev = tmp_dev->dev;
269     	bh->b_rsector = rsect;
270     
271     	/*
272     	 * Let the main block layer submit the IO and resolve recursion:
273     	 */
274     	return 1;
275     
276     bad_map:
277     	printk ("raid0_make_request bug: can't convert block across chunks or bigger than %dk %ld %d\n", chunk_size, bh->b_rsector, bh->b_size >> 10);
278     	goto outerr;
279     bad_hash:
280     	printk("raid0_make_request bug: hash==NULL for block %ld\n", block);
281     	goto outerr;
282     bad_zone0:
283     	printk ("raid0_make_request bug: hash->zone0==NULL for block %ld\n", block);
284     	goto outerr;
285     bad_zone1:
286     	printk ("raid0_make_request bug: hash->zone1==NULL for block %ld\n", block);
287      outerr:
288     	buffer_IO_error(bh);
289     	return 0;
290     }
291     			   
292     static int raid0_status (char *page, mddev_t *mddev)
293     {
294     	int sz = 0;
295     #undef MD_DEBUG
296     #ifdef MD_DEBUG
297     	int j, k;
298     	raid0_conf_t *conf = mddev_to_conf(mddev);
299       
300     	sz += sprintf(page + sz, "      ");
301     	for (j = 0; j < conf->nr_zones; j++) {
302     		sz += sprintf(page + sz, "[z%d",
303     				conf->hash_table[j].zone0 - conf->strip_zone);
304     		if (conf->hash_table[j].zone1)
305     			sz += sprintf(page+sz, "/z%d] ",
306     				conf->hash_table[j].zone1 - conf->strip_zone);
307     		else
308     			sz += sprintf(page+sz, "] ");
309     	}
310       
311     	sz += sprintf(page + sz, "\n");
312       
313     	for (j = 0; j < conf->nr_strip_zones; j++) {
314     		sz += sprintf(page + sz, "      z%d=[", j);
315     		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
316     			sz += sprintf (page+sz, "%s/", partition_name(
317     				conf->strip_zone[j].dev[k]->dev));
318     		sz--;
319     		sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n",
320     				conf->strip_zone[j].zone_offset,
321     				conf->strip_zone[j].dev_offset,
322     				conf->strip_zone[j].size);
323     	}
324     #endif
325     	sz += sprintf(page + sz, " %dk chunks", mddev->param.chunk_size/1024);
326     	return sz;
327     }
328     
329     static mdk_personality_t raid0_personality=
330     {
331     	name:		"raid0",
332     	make_request:	raid0_make_request,
333     	run:		raid0_run,
334     	stop:		raid0_stop,
335     	status:		raid0_status,
336     };
337     
338     static int md__init raid0_init (void)
339     {
340     	return register_md_personality (RAID0, &raid0_personality);
341     }
342     
343     static void raid0_exit (void)
344     {
345     	unregister_md_personality (RAID0);
346     }
347     
348     module_init(raid0_init);
349     module_exit(raid0_exit);
350     
351     
352