File: /usr/src/linux/fs/block_dev.c

1     /*
2      *  linux/fs/block_dev.c
3      *
4      *  Copyright (C) 1991, 1992  Linus Torvalds
5      *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
6      */
7     
8     #include <linux/config.h>
9     #include <linux/init.h>
10     #include <linux/mm.h>
11     #include <linux/locks.h>
12     #include <linux/fcntl.h>
13     #include <linux/slab.h>
14     #include <linux/kmod.h>
15     #include <linux/major.h>
16     #include <linux/devfs_fs_kernel.h>
17     #include <linux/smp_lock.h>
18     #include <linux/iobuf.h>
19     #include <linux/highmem.h>
20     #include <linux/blkdev.h>
21     #include <linux/module.h>
22     
23     #include <asm/uaccess.h>
24     
25     static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
26     {
27     	int err;
28     
29     	err = -EIO;
30     	if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
31     		goto out;
32     
33     	bh_result->b_blocknr = iblock;
34     	bh_result->b_state |= 1UL << BH_Mapped;
35     	err = 0;
36     
37      out:
38     	return err;
39     }
40     
41     static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
42     {
43     	int i, nr_blocks, retval, dev = inode->i_rdev;
44     	unsigned long * blocks = iobuf->blocks;
45     
46     	if (blocksize != BUFFERED_BLOCKSIZE)
47     		BUG();
48     
49     	nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
50     	/* build the blocklist */
51     	for (i = 0; i < nr_blocks; i++, blocknr++) {
52     		struct buffer_head bh;
53     
54     		retval = blkdev_get_block(inode, blocknr, &bh);
55     		if (retval)
56     			goto out;
57     
58     		blocks[i] = bh.b_blocknr;
59     	}
60     
61     	retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
62     
63      out:
64     	return retval;
65     }
66     
67     static int blkdev_writepage(struct page * page)
68     {
69     	int err, i;
70     	unsigned long block;
71     	struct buffer_head *bh, *head;
72     	struct inode *inode = page->mapping->host;
73     
74     	if (!PageLocked(page))
75     		BUG();
76     
77     	if (!page->buffers)
78     		create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
79     	head = page->buffers;
80     
81     	block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
82     
83     	bh = head;
84     	i = 0;
85     
86     	/* Stage 1: make sure we have all the buffers mapped! */
87     	do {
88     		/*
89     		 * If the buffer isn't up-to-date, we can't be sure
90     		 * that the buffer has been initialized with the proper
91     		 * block number information etc..
92     		 *
93     		 * Leave it to the low-level FS to make all those
94     		 * decisions (block #0 may actually be a valid block)
95     		 */
96     		if (!buffer_mapped(bh)) {
97     			err = blkdev_get_block(inode, block, bh);
98     			if (err)
99     				goto out;
100     		}
101     		bh = bh->b_this_page;
102     		block++;
103     	} while (bh != head);
104     
105     	/* Stage 2: lock the buffers, mark them clean */
106     	do {
107     		lock_buffer(bh);
108     		set_buffer_async_io(bh);
109     		set_bit(BH_Uptodate, &bh->b_state);
110     		clear_bit(BH_Dirty, &bh->b_state);
111     		bh = bh->b_this_page;
112     	} while (bh != head);
113     
114     	/* Stage 3: submit the IO */
115     	do {
116     		submit_bh(WRITE, bh);
117     		bh = bh->b_this_page;
118     	} while (bh != head);
119     
120     	/* Done - end_buffer_io_async will unlock */
121     	SetPageUptodate(page);
122     	return 0;
123     
124     out:
125     	ClearPageUptodate(page);
126     	UnlockPage(page);
127     	return err;
128     }
129     
130     static int blkdev_readpage(struct file * file, struct page * page)
131     {
132     	struct inode *inode = page->mapping->host;
133     	kdev_t dev = inode->i_rdev;
134     	unsigned long iblock, lblock;
135     	struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
136     	unsigned int blocks;
137     	int nr, i;
138     
139     	if (!PageLocked(page))
140     		PAGE_BUG(page);
141     	if (!page->buffers)
142     		create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
143     	head = page->buffers;
144     
145     	blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
146     	iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
147     	lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
148     	bh = head;
149     	nr = 0;
150     	i = 0;
151     
152     	do {
153     		if (buffer_uptodate(bh))
154     			continue;
155     
156     		if (!buffer_mapped(bh)) {
157     			if (iblock <= lblock) {
158     				if (blkdev_get_block(inode, iblock, bh))
159     					continue;
160     			}
161     			if (!buffer_mapped(bh)) {
162     				memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
163     				flush_dcache_page(page);
164     				kunmap(page);
165     				set_bit(BH_Uptodate, &bh->b_state);
166     				continue;
167     			}
168     			/* get_block() might have updated the buffer synchronously */
169     			if (buffer_uptodate(bh))
170     				continue;
171     		}
172     
173     		arr[nr] = bh;
174     		nr++;
175     	} while (i++, iblock++, (bh = bh->b_this_page) != head);
176     
177     	if (!nr) {
178     		/*
179     		 * all buffers are uptodate - we can set the page
180     		 * uptodate as well.
181     		 */
182     		SetPageUptodate(page);
183     		UnlockPage(page);
184     		return 0;
185     	}
186     
187     	/* Stage two: lock the buffers */
188     	for (i = 0; i < nr; i++) {
189     		struct buffer_head * bh = arr[i];
190     		lock_buffer(bh);
191     		set_buffer_async_io(bh);
192     	}
193     
194     	/* Stage 3: start the IO */
195     	for (i = 0; i < nr; i++)
196     		submit_bh(READ, arr[i]);
197     
198     	return 0;
199     }
200     
201     static int __blkdev_prepare_write(struct inode *inode, struct page *page,
202     				  unsigned from, unsigned to)
203     {
204     	kdev_t dev = inode->i_rdev;
205     	unsigned block_start, block_end;
206     	unsigned long block;
207     	int err = 0;
208     	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
209     	kmap(page);
210     
211     	if (!page->buffers)
212     		create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
213     	head = page->buffers;
214     
215     	block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
216     
217     	for(bh = head, block_start = 0; bh != head || !block_start;
218     	    block++, block_start=block_end, bh = bh->b_this_page) {
219     		if (!bh)
220     			BUG();
221     		block_end = block_start + BUFFERED_BLOCKSIZE;
222     		if (block_end <= from)
223     			continue;
224     		if (block_start >= to)
225     			break;
226     		if (!buffer_mapped(bh)) {
227     			err = blkdev_get_block(inode, block, bh);
228     			if (err)
229     				goto out;
230     		}
231     		if (Page_Uptodate(page)) {
232     			set_bit(BH_Uptodate, &bh->b_state);
233     			continue; 
234     		}
235     		if (!buffer_uptodate(bh) &&
236     		     (block_start < from || block_end > to)) {
237     			ll_rw_block(READ, 1, &bh);
238     			*wait_bh++=bh;
239     		}
240     	}
241     	/*
242     	 * If we issued read requests - let them complete.
243     	 */
244     	while(wait_bh > wait) {
245     		wait_on_buffer(*--wait_bh);
246     		err = -EIO;
247     		if (!buffer_uptodate(*wait_bh))
248     			goto out;
249     	}
250     	return 0;
251     out:
252     	return err;
253     }
254     
255     static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
256     {
257     	struct inode *inode = page->mapping->host;
258     	int err = __blkdev_prepare_write(inode, page, from, to);
259     	if (err) {
260     		ClearPageUptodate(page);
261     		kunmap(page);
262     	}
263     	return err;
264     }
265     
266     static int __blkdev_commit_write(struct inode *inode, struct page *page,
267     				 unsigned from, unsigned to)
268     {
269     	unsigned block_start, block_end;
270     	int partial = 0, need_balance_dirty = 0;
271     	struct buffer_head *bh, *head;
272     
273     	for(bh = head = page->buffers, block_start = 0;
274     	    bh != head || !block_start;
275     	    block_start=block_end, bh = bh->b_this_page) {
276     		block_end = block_start + BUFFERED_BLOCKSIZE;
277     		if (block_end <= from || block_start >= to) {
278     			if (!buffer_uptodate(bh))
279     				partial = 1;
280     		} else {
281     			set_bit(BH_Uptodate, &bh->b_state);
282     			if (!atomic_set_buffer_dirty(bh)) {
283     				__mark_dirty(bh);
284     				buffer_insert_inode_data_queue(bh, inode);
285     				need_balance_dirty = 1;
286     			}
287     		}
288     	}
289     
290     	if (need_balance_dirty)
291     		balance_dirty();
292     	/*
293     	 * is this a partial write that happened to make all buffers
294     	 * uptodate then we can optimize away a bogus readpage() for
295     	 * the next read(). Here we 'discover' wether the page went
296     	 * uptodate as a result of this (potentially partial) write.
297     	 */
298     	if (!partial)
299     		SetPageUptodate(page);
300     	return 0;
301     }
302     
303     static int blkdev_commit_write(struct file *file, struct page *page,
304     			       unsigned from, unsigned to)
305     {
306     	struct inode *inode = page->mapping->host;
307     	__blkdev_commit_write(inode,page,from,to);
308     	kunmap(page);
309     	return 0;
310     }
311     
312     /*
313      * private llseek:
314      * for a block special file file->f_dentry->d_inode->i_size is zero
315      * so we compute the size by hand (just as in block_read/write above)
316      */
317     static loff_t block_llseek(struct file *file, loff_t offset, int origin)
318     {
319     	long long retval;
320     	kdev_t dev;
321     
322     	switch (origin) {
323     		case 2:
324     			dev = file->f_dentry->d_inode->i_rdev;
325     			if (blk_size[MAJOR(dev)])
326     				offset += (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
327     			/* else?  return -EINVAL? */
328     			break;
329     		case 1:
330     			offset += file->f_pos;
331     	}
332     	retval = -EINVAL;
333     	if (offset >= 0) {
334     		if (offset != file->f_pos) {
335     			file->f_pos = offset;
336     			file->f_reada = 0;
337     			file->f_version = ++event;
338     		}
339     		retval = offset;
340     	}
341     	return retval;
342     }
343     	
344     
345     static int __block_fsync(struct inode * inode)
346     {
347     	int ret;
348     
349     	filemap_fdatasync(inode->i_mapping);
350     	ret = sync_buffers(inode->i_rdev, 1);
351     	filemap_fdatawait(inode->i_mapping);
352     
353     	return ret;
354     }
355     
356     /*
357      *	Filp may be NULL when we are called by an msync of a vma
358      *	since the vma has no handle.
359      */
360      
361     static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
362     {
363     	struct inode * inode = dentry->d_inode;
364     
365     	return __block_fsync(inode);
366     }
367     
368     /*
369      * pseudo-fs
370      */
371     
372     static struct super_block *bd_read_super(struct super_block *sb, void *data, int silent)
373     {
374     	static struct super_operations sops = {};
375     	struct inode *root = new_inode(sb);
376     	if (!root)
377     		return NULL;
378     	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
379     	root->i_uid = root->i_gid = 0;
380     	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
381     	sb->s_blocksize = 1024;
382     	sb->s_blocksize_bits = 10;
383     	sb->s_magic = 0x62646576;
384     	sb->s_op = &sops;
385     	sb->s_root = d_alloc(NULL, &(const struct qstr) { "bdev:", 5, 0 });
386     	if (!sb->s_root) {
387     		iput(root);
388     		return NULL;
389     	}
390     	sb->s_root->d_sb = sb;
391     	sb->s_root->d_parent = sb->s_root;
392     	d_instantiate(sb->s_root, root);
393     	return sb;
394     }
395     
396     static DECLARE_FSTYPE(bd_type, "bdev", bd_read_super, FS_NOMOUNT);
397     
398     static struct vfsmount *bd_mnt;
399     
400     /*
401      * bdev cache handling - shamelessly stolen from inode.c
402      * We use smaller hashtable, though.
403      */
404     
405     #define HASH_BITS	6
406     #define HASH_SIZE	(1UL << HASH_BITS)
407     #define HASH_MASK	(HASH_SIZE-1)
408     static struct list_head bdev_hashtable[HASH_SIZE];
409     static spinlock_t bdev_lock = SPIN_LOCK_UNLOCKED;
410     static kmem_cache_t * bdev_cachep;
411     
412     #define alloc_bdev() \
413     	 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
414     #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
415     
416     static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
417     {
418     	struct block_device * bdev = (struct block_device *) foo;
419     
420     	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
421     	    SLAB_CTOR_CONSTRUCTOR)
422     	{
423     		memset(bdev, 0, sizeof(*bdev));
424     		sema_init(&bdev->bd_sem, 1);
425     		INIT_LIST_HEAD(&bdev->bd_inodes);
426     	}
427     }
428     
429     void __init bdev_cache_init(void)
430     {
431     	int i, err;
432     	struct list_head *head = bdev_hashtable;
433     
434     	i = HASH_SIZE;
435     	do {
436     		INIT_LIST_HEAD(head);
437     		head++;
438     		i--;
439     	} while (i);
440     
441     	bdev_cachep = kmem_cache_create("bdev_cache",
442     					 sizeof(struct block_device),
443     					 0, SLAB_HWCACHE_ALIGN, init_once,
444     					 NULL);
445     	if (!bdev_cachep)
446     		panic("Cannot create bdev_cache SLAB cache");
447     	err = register_filesystem(&bd_type);
448     	if (err)
449     		panic("Cannot register bdev pseudo-fs");
450     	bd_mnt = kern_mount(&bd_type);
451     	err = PTR_ERR(bd_mnt);
452     	if (IS_ERR(bd_mnt))
453     		panic("Cannot create bdev pseudo-fs");
454     }
455     
456     /*
457      * Most likely _very_ bad one - but then it's hardly critical for small
458      * /dev and can be fixed when somebody will need really large one.
459      */
460     static inline unsigned long hash(dev_t dev)
461     {
462     	unsigned long tmp = dev;
463     	tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
464     	return tmp & HASH_MASK;
465     }
466     
467     static struct block_device *bdfind(dev_t dev, struct list_head *head)
468     {
469     	struct list_head *p;
470     	struct block_device *bdev;
471     	for (p=head->next; p!=head; p=p->next) {
472     		bdev = list_entry(p, struct block_device, bd_hash);
473     		if (bdev->bd_dev != dev)
474     			continue;
475     		atomic_inc(&bdev->bd_count);
476     		return bdev;
477     	}
478     	return NULL;
479     }
480     
481     struct block_device *bdget(dev_t dev)
482     {
483     	struct list_head * head = bdev_hashtable + hash(dev);
484     	struct block_device *bdev, *new_bdev;
485     	spin_lock(&bdev_lock);
486     	bdev = bdfind(dev, head);
487     	spin_unlock(&bdev_lock);
488     	if (bdev)
489     		return bdev;
490     	new_bdev = alloc_bdev();
491     	if (new_bdev) {
492     		struct inode *inode = new_inode(bd_mnt->mnt_sb);
493     		if (inode) {
494     			atomic_set(&new_bdev->bd_count,1);
495     			new_bdev->bd_dev = dev;
496     			new_bdev->bd_op = NULL;
497     			new_bdev->bd_inode = inode;
498     			inode->i_rdev = to_kdev_t(dev);
499     			inode->i_bdev = new_bdev;
500     			inode->i_data.a_ops = &def_blk_aops;
501     			spin_lock(&bdev_lock);
502     			bdev = bdfind(dev, head);
503     			if (!bdev) {
504     				list_add(&new_bdev->bd_hash, head);
505     				spin_unlock(&bdev_lock);
506     				return new_bdev;
507     			}
508     			spin_unlock(&bdev_lock);
509     			iput(new_bdev->bd_inode);
510     		}
511     		destroy_bdev(new_bdev);
512     	}
513     	return bdev;
514     }
515     
516     static inline void __bd_forget(struct inode *inode)
517     {
518     	list_del_init(&inode->i_devices);
519     	inode->i_bdev = NULL;
520     	inode->i_mapping = &inode->i_data;
521     }
522     
523     void bdput(struct block_device *bdev)
524     {
525     	if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
526     		struct list_head *p;
527     		if (bdev->bd_openers)
528     			BUG();
529     		list_del(&bdev->bd_hash);
530     		while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
531     			__bd_forget(list_entry(p, struct inode, i_devices));
532     		}
533     		spin_unlock(&bdev_lock);
534     		iput(bdev->bd_inode);
535     		destroy_bdev(bdev);
536     	}
537     }
538      
539     int bd_acquire(struct inode *inode)
540     {
541     	struct block_device *bdev;
542     	spin_lock(&bdev_lock);
543     	if (inode->i_bdev) {
544     		atomic_inc(&inode->i_bdev->bd_count);
545     		spin_unlock(&bdev_lock);
546     		return 0;
547     	}
548     	spin_unlock(&bdev_lock);
549     	bdev = bdget(kdev_t_to_nr(inode->i_rdev));
550     	if (!bdev)
551     		return -ENOMEM;
552     	spin_lock(&bdev_lock);
553     	if (!inode->i_bdev) {
554     		inode->i_bdev = bdev;
555     		inode->i_mapping = bdev->bd_inode->i_mapping;
556     		list_add(&inode->i_devices, &bdev->bd_inodes);
557     	} else if (inode->i_bdev != bdev)
558     		BUG();
559     	spin_unlock(&bdev_lock);
560     	return 0;
561     }
562     
563     /* Call when you free inode */
564     
565     void bd_forget(struct inode *inode)
566     {
567     	spin_lock(&bdev_lock);
568     	if (inode->i_bdev)
569     		__bd_forget(inode);
570     	spin_unlock(&bdev_lock);
571     }
572     
573     static struct {
574     	const char *name;
575     	struct block_device_operations *bdops;
576     } blkdevs[MAX_BLKDEV];
577     
578     int get_blkdev_list(char * p)
579     {
580     	int i;
581     	int len;
582     
583     	len = sprintf(p, "\nBlock devices:\n");
584     	for (i = 0; i < MAX_BLKDEV ; i++) {
585     		if (blkdevs[i].bdops) {
586     			len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
587     		}
588     	}
589     	return len;
590     }
591     
592     /*
593     	Return the function table of a device.
594     	Load the driver if needed.
595     */
596     const struct block_device_operations * get_blkfops(unsigned int major)
597     {
598     	const struct block_device_operations *ret = NULL;
599     
600     	/* major 0 is used for non-device mounts */
601     	if (major && major < MAX_BLKDEV) {
602     #ifdef CONFIG_KMOD
603     		if (!blkdevs[major].bdops) {
604     			char name[20];
605     			sprintf(name, "block-major-%d", major);
606     			request_module(name);
607     		}
608     #endif
609     		ret = blkdevs[major].bdops;
610     	}
611     	return ret;
612     }
613     
614     int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
615     {
616     	if (major == 0) {
617     		for (major = MAX_BLKDEV-1; major > 0; major--) {
618     			if (blkdevs[major].bdops == NULL) {
619     				blkdevs[major].name = name;
620     				blkdevs[major].bdops = bdops;
621     				return major;
622     			}
623     		}
624     		return -EBUSY;
625     	}
626     	if (major >= MAX_BLKDEV)
627     		return -EINVAL;
628     	if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
629     		return -EBUSY;
630     	blkdevs[major].name = name;
631     	blkdevs[major].bdops = bdops;
632     	return 0;
633     }
634     
635     int unregister_blkdev(unsigned int major, const char * name)
636     {
637     	if (major >= MAX_BLKDEV)
638     		return -EINVAL;
639     	if (!blkdevs[major].bdops)
640     		return -EINVAL;
641     	if (strcmp(blkdevs[major].name, name))
642     		return -EINVAL;
643     	blkdevs[major].name = NULL;
644     	blkdevs[major].bdops = NULL;
645     	return 0;
646     }
647     
648     /*
649      * This routine checks whether a removable media has been changed,
650      * and invalidates all buffer-cache-entries in that case. This
651      * is a relatively slow routine, so we have to try to minimize using
652      * it. Thus it is called only upon a 'mount' or 'open'. This
653      * is the best way of combining speed and utility, I think.
654      * People changing diskettes in the middle of an operation deserve
655      * to lose :-)
656      */
657     int check_disk_change(kdev_t dev)
658     {
659     	int i;
660     	const struct block_device_operations * bdops = NULL;
661     
662     	i = MAJOR(dev);
663     	if (i < MAX_BLKDEV)
664     		bdops = blkdevs[i].bdops;
665     	if (bdops == NULL) {
666     		devfs_handle_t de;
667     
668     		de = devfs_find_handle (NULL, NULL, i, MINOR (dev),
669     					DEVFS_SPECIAL_BLK, 0);
670     		if (de) bdops = devfs_get_ops (de);
671     	}
672     	if (bdops == NULL)
673     		return 0;
674     	if (bdops->check_media_change == NULL)
675     		return 0;
676     	if (!bdops->check_media_change(dev))
677     		return 0;
678     
679     	printk(KERN_DEBUG "VFS: Disk change detected on device %s\n",
680     		bdevname(dev));
681     
682     	if (invalidate_device(dev, 0))
683     		printk("VFS: busy inodes on changed media.\n");
684     
685     	if (bdops->revalidate)
686     		bdops->revalidate(dev);
687     	return 1;
688     }
689     
690     int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
691     {
692     	int res;
693     	mm_segment_t old_fs = get_fs();
694     
695     	if (!bdev->bd_op->ioctl)
696     		return -EINVAL;
697     	set_fs(KERNEL_DS);
698     	res = bdev->bd_op->ioctl(bdev->bd_inode, NULL, cmd, arg);
699     	set_fs(old_fs);
700     	return res;
701     }
702     
703     int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
704     {
705     	int ret = -ENODEV;
706     	kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
707     	down(&bdev->bd_sem);
708     
709     	lock_kernel();
710     	if (!bdev->bd_op)
711     		bdev->bd_op = get_blkfops(MAJOR(rdev));
712     	if (bdev->bd_op) {
713     		/*
714     		 * This crockload is due to bad choice of ->open() type.
715     		 * It will go away.
716     		 * For now, block device ->open() routine must _not_
717     		 * examine anything in 'inode' argument except ->i_rdev.
718     		 */
719     		struct file fake_file = {};
720     		struct dentry fake_dentry = {};
721     		ret = -ENOMEM;
722     		fake_file.f_mode = mode;
723     		fake_file.f_flags = flags;
724     		fake_file.f_dentry = &fake_dentry;
725     		fake_dentry.d_inode = bdev->bd_inode;
726     		ret = 0;
727     		if (bdev->bd_op->open)
728     			ret = bdev->bd_op->open(bdev->bd_inode, &fake_file);
729     		if (!ret) {
730     			bdev->bd_openers++;
731     		} else if (!bdev->bd_openers)
732     			bdev->bd_op = NULL;
733     	}
734     	unlock_kernel();
735     	up(&bdev->bd_sem);
736     	if (ret)
737     		bdput(bdev);
738     	return ret;
739     }
740     
741     int blkdev_open(struct inode * inode, struct file * filp)
742     {
743     	int ret;
744     	struct block_device *bdev;
745     
746     	/*
747     	 * Preserve backwards compatibility and allow large file access
748     	 * even if userspace doesn't ask for it explicitly. Some mkfs
749     	 * binary needs it. We might want to drop this workaround
750     	 * during an unstable branch.
751     	 */
752     	filp->f_flags |= O_LARGEFILE;
753     
754     	bd_acquire(inode);
755     	bdev = inode->i_bdev;
756     	down(&bdev->bd_sem);
757     
758     	ret = -ENXIO;
759     	lock_kernel();
760     	if (!bdev->bd_op)
761     		bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
762     	if (bdev->bd_op) {
763     		ret = 0;
764     		if (bdev->bd_op->open)
765     			ret = bdev->bd_op->open(inode,filp);
766     		if (!ret)
767     			bdev->bd_openers++;
768     		else if (!bdev->bd_openers)
769     			bdev->bd_op = NULL;
770     	}	
771     	unlock_kernel();
772     	up(&bdev->bd_sem);
773     	if (ret)
774     		bdput(bdev);
775     	return ret;
776     }	
777     
778     int blkdev_put(struct block_device *bdev, int kind)
779     {
780     	int ret = 0;
781     	kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
782     	struct inode *bd_inode = bdev->bd_inode;
783     
784     	down(&bdev->bd_sem);
785     	lock_kernel();
786     	if (kind == BDEV_FILE) {
787     		struct super_block * sb;
788     
789     		__block_fsync(bd_inode);
790     
791     		/* Janitorianism: this shit must go away */
792     		sb = get_super(bd_inode->i_rdev);
793     		if (sb) {
794     			if (sb->s_flags & MS_RDONLY) {
795     				shrink_dcache_sb(sb);
796     				invalidate_inodes(sb);
797     				invalidate_buffers(bd_inode->i_rdev);
798     			}
799     			lock_super(sb);
800     			if (sb->s_flags & MS_RDONLY)
801     				update_buffers(bd_inode->i_rdev);
802     			unlock_super(sb);
803     			drop_super(sb);
804     		}
805     	} else if (kind == BDEV_FS)
806     		fsync_no_super(rdev);
807     	if (!--bdev->bd_openers) {
808     		truncate_inode_pages(bd_inode->i_mapping, 0);
809     		invalidate_buffers(rdev);
810     	}
811     	if (bdev->bd_op->release)
812     		ret = bdev->bd_op->release(bd_inode, NULL);
813     	if (!bdev->bd_openers)
814     		bdev->bd_op = NULL;
815     	unlock_kernel();
816     	up(&bdev->bd_sem);
817     	bdput(bdev);
818     	return ret;
819     }
820     
821     int blkdev_close(struct inode * inode, struct file * filp)
822     {
823     	return blkdev_put(inode->i_bdev, BDEV_FILE);
824     }
825     
826     static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
827     			unsigned long arg)
828     {
829     	if (inode->i_bdev->bd_op->ioctl)
830     		return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
831     	return -EINVAL;
832     }
833     
834     struct address_space_operations def_blk_aops = {
835     	readpage: blkdev_readpage,
836     	writepage: blkdev_writepage,
837     	sync_page: block_sync_page,
838     	prepare_write: blkdev_prepare_write,
839     	commit_write: blkdev_commit_write,
840     	direct_IO: blkdev_direct_IO,
841     };
842     
843     struct file_operations def_blk_fops = {
844     	open:		blkdev_open,
845     	release:	blkdev_close,
846     	llseek:		block_llseek,
847     	read:		generic_file_read,
848     	write:		generic_file_write,
849     	mmap:		generic_file_mmap,
850     	fsync:		block_fsync,
851     	ioctl:		blkdev_ioctl,
852     };
853     
854     const char * bdevname(kdev_t dev)
855     {
856     	static char buffer[32];
857     	const char * name = blkdevs[MAJOR(dev)].name;
858     
859     	if (!name)
860     		name = "unknown-block";
861     
862     	sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev));
863     	return buffer;
864     }
865