File: /usr/src/linux/fs/block_dev.c
1 /*
2 * linux/fs/block_dev.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
6 */
7
8 #include <linux/config.h>
9 #include <linux/init.h>
10 #include <linux/mm.h>
11 #include <linux/locks.h>
12 #include <linux/fcntl.h>
13 #include <linux/slab.h>
14 #include <linux/kmod.h>
15 #include <linux/major.h>
16 #include <linux/devfs_fs_kernel.h>
17 #include <linux/smp_lock.h>
18 #include <linux/iobuf.h>
19 #include <linux/highmem.h>
20 #include <linux/blkdev.h>
21 #include <linux/module.h>
22
23 #include <asm/uaccess.h>
24
25 static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
26 {
27 int err;
28
29 err = -EIO;
30 if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
31 goto out;
32
33 bh_result->b_blocknr = iblock;
34 bh_result->b_state |= 1UL << BH_Mapped;
35 err = 0;
36
37 out:
38 return err;
39 }
40
41 static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
42 {
43 int i, nr_blocks, retval, dev = inode->i_rdev;
44 unsigned long * blocks = iobuf->blocks;
45
46 if (blocksize != BUFFERED_BLOCKSIZE)
47 BUG();
48
49 nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
50 /* build the blocklist */
51 for (i = 0; i < nr_blocks; i++, blocknr++) {
52 struct buffer_head bh;
53
54 retval = blkdev_get_block(inode, blocknr, &bh);
55 if (retval)
56 goto out;
57
58 blocks[i] = bh.b_blocknr;
59 }
60
61 retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
62
63 out:
64 return retval;
65 }
66
67 static int blkdev_writepage(struct page * page)
68 {
69 int err, i;
70 unsigned long block;
71 struct buffer_head *bh, *head;
72 struct inode *inode = page->mapping->host;
73
74 if (!PageLocked(page))
75 BUG();
76
77 if (!page->buffers)
78 create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
79 head = page->buffers;
80
81 block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
82
83 bh = head;
84 i = 0;
85
86 /* Stage 1: make sure we have all the buffers mapped! */
87 do {
88 /*
89 * If the buffer isn't up-to-date, we can't be sure
90 * that the buffer has been initialized with the proper
91 * block number information etc..
92 *
93 * Leave it to the low-level FS to make all those
94 * decisions (block #0 may actually be a valid block)
95 */
96 if (!buffer_mapped(bh)) {
97 err = blkdev_get_block(inode, block, bh);
98 if (err)
99 goto out;
100 }
101 bh = bh->b_this_page;
102 block++;
103 } while (bh != head);
104
105 /* Stage 2: lock the buffers, mark them clean */
106 do {
107 lock_buffer(bh);
108 set_buffer_async_io(bh);
109 set_bit(BH_Uptodate, &bh->b_state);
110 clear_bit(BH_Dirty, &bh->b_state);
111 bh = bh->b_this_page;
112 } while (bh != head);
113
114 /* Stage 3: submit the IO */
115 do {
116 submit_bh(WRITE, bh);
117 bh = bh->b_this_page;
118 } while (bh != head);
119
120 /* Done - end_buffer_io_async will unlock */
121 SetPageUptodate(page);
122 return 0;
123
124 out:
125 ClearPageUptodate(page);
126 UnlockPage(page);
127 return err;
128 }
129
130 static int blkdev_readpage(struct file * file, struct page * page)
131 {
132 struct inode *inode = page->mapping->host;
133 kdev_t dev = inode->i_rdev;
134 unsigned long iblock, lblock;
135 struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
136 unsigned int blocks;
137 int nr, i;
138
139 if (!PageLocked(page))
140 PAGE_BUG(page);
141 if (!page->buffers)
142 create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
143 head = page->buffers;
144
145 blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
146 iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
147 lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
148 bh = head;
149 nr = 0;
150 i = 0;
151
152 do {
153 if (buffer_uptodate(bh))
154 continue;
155
156 if (!buffer_mapped(bh)) {
157 if (iblock <= lblock) {
158 if (blkdev_get_block(inode, iblock, bh))
159 continue;
160 }
161 if (!buffer_mapped(bh)) {
162 memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
163 flush_dcache_page(page);
164 kunmap(page);
165 set_bit(BH_Uptodate, &bh->b_state);
166 continue;
167 }
168 /* get_block() might have updated the buffer synchronously */
169 if (buffer_uptodate(bh))
170 continue;
171 }
172
173 arr[nr] = bh;
174 nr++;
175 } while (i++, iblock++, (bh = bh->b_this_page) != head);
176
177 if (!nr) {
178 /*
179 * all buffers are uptodate - we can set the page
180 * uptodate as well.
181 */
182 SetPageUptodate(page);
183 UnlockPage(page);
184 return 0;
185 }
186
187 /* Stage two: lock the buffers */
188 for (i = 0; i < nr; i++) {
189 struct buffer_head * bh = arr[i];
190 lock_buffer(bh);
191 set_buffer_async_io(bh);
192 }
193
194 /* Stage 3: start the IO */
195 for (i = 0; i < nr; i++)
196 submit_bh(READ, arr[i]);
197
198 return 0;
199 }
200
201 static int __blkdev_prepare_write(struct inode *inode, struct page *page,
202 unsigned from, unsigned to)
203 {
204 kdev_t dev = inode->i_rdev;
205 unsigned block_start, block_end;
206 unsigned long block;
207 int err = 0;
208 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
209 kmap(page);
210
211 if (!page->buffers)
212 create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
213 head = page->buffers;
214
215 block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
216
217 for(bh = head, block_start = 0; bh != head || !block_start;
218 block++, block_start=block_end, bh = bh->b_this_page) {
219 if (!bh)
220 BUG();
221 block_end = block_start + BUFFERED_BLOCKSIZE;
222 if (block_end <= from)
223 continue;
224 if (block_start >= to)
225 break;
226 if (!buffer_mapped(bh)) {
227 err = blkdev_get_block(inode, block, bh);
228 if (err)
229 goto out;
230 }
231 if (Page_Uptodate(page)) {
232 set_bit(BH_Uptodate, &bh->b_state);
233 continue;
234 }
235 if (!buffer_uptodate(bh) &&
236 (block_start < from || block_end > to)) {
237 ll_rw_block(READ, 1, &bh);
238 *wait_bh++=bh;
239 }
240 }
241 /*
242 * If we issued read requests - let them complete.
243 */
244 while(wait_bh > wait) {
245 wait_on_buffer(*--wait_bh);
246 err = -EIO;
247 if (!buffer_uptodate(*wait_bh))
248 goto out;
249 }
250 return 0;
251 out:
252 return err;
253 }
254
255 static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
256 {
257 struct inode *inode = page->mapping->host;
258 int err = __blkdev_prepare_write(inode, page, from, to);
259 if (err) {
260 ClearPageUptodate(page);
261 kunmap(page);
262 }
263 return err;
264 }
265
266 static int __blkdev_commit_write(struct inode *inode, struct page *page,
267 unsigned from, unsigned to)
268 {
269 unsigned block_start, block_end;
270 int partial = 0, need_balance_dirty = 0;
271 struct buffer_head *bh, *head;
272
273 for(bh = head = page->buffers, block_start = 0;
274 bh != head || !block_start;
275 block_start=block_end, bh = bh->b_this_page) {
276 block_end = block_start + BUFFERED_BLOCKSIZE;
277 if (block_end <= from || block_start >= to) {
278 if (!buffer_uptodate(bh))
279 partial = 1;
280 } else {
281 set_bit(BH_Uptodate, &bh->b_state);
282 if (!atomic_set_buffer_dirty(bh)) {
283 __mark_dirty(bh);
284 buffer_insert_inode_data_queue(bh, inode);
285 need_balance_dirty = 1;
286 }
287 }
288 }
289
290 if (need_balance_dirty)
291 balance_dirty();
292 /*
293 * is this a partial write that happened to make all buffers
294 * uptodate then we can optimize away a bogus readpage() for
295 * the next read(). Here we 'discover' wether the page went
296 * uptodate as a result of this (potentially partial) write.
297 */
298 if (!partial)
299 SetPageUptodate(page);
300 return 0;
301 }
302
303 static int blkdev_commit_write(struct file *file, struct page *page,
304 unsigned from, unsigned to)
305 {
306 struct inode *inode = page->mapping->host;
307 __blkdev_commit_write(inode,page,from,to);
308 kunmap(page);
309 return 0;
310 }
311
312 /*
313 * private llseek:
314 * for a block special file file->f_dentry->d_inode->i_size is zero
315 * so we compute the size by hand (just as in block_read/write above)
316 */
317 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
318 {
319 long long retval;
320 kdev_t dev;
321
322 switch (origin) {
323 case 2:
324 dev = file->f_dentry->d_inode->i_rdev;
325 if (blk_size[MAJOR(dev)])
326 offset += (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
327 /* else? return -EINVAL? */
328 break;
329 case 1:
330 offset += file->f_pos;
331 }
332 retval = -EINVAL;
333 if (offset >= 0) {
334 if (offset != file->f_pos) {
335 file->f_pos = offset;
336 file->f_reada = 0;
337 file->f_version = ++event;
338 }
339 retval = offset;
340 }
341 return retval;
342 }
343
344
345 static int __block_fsync(struct inode * inode)
346 {
347 int ret;
348
349 filemap_fdatasync(inode->i_mapping);
350 ret = sync_buffers(inode->i_rdev, 1);
351 filemap_fdatawait(inode->i_mapping);
352
353 return ret;
354 }
355
356 /*
357 * Filp may be NULL when we are called by an msync of a vma
358 * since the vma has no handle.
359 */
360
361 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
362 {
363 struct inode * inode = dentry->d_inode;
364
365 return __block_fsync(inode);
366 }
367
368 /*
369 * pseudo-fs
370 */
371
372 static struct super_block *bd_read_super(struct super_block *sb, void *data, int silent)
373 {
374 static struct super_operations sops = {};
375 struct inode *root = new_inode(sb);
376 if (!root)
377 return NULL;
378 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
379 root->i_uid = root->i_gid = 0;
380 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
381 sb->s_blocksize = 1024;
382 sb->s_blocksize_bits = 10;
383 sb->s_magic = 0x62646576;
384 sb->s_op = &sops;
385 sb->s_root = d_alloc(NULL, &(const struct qstr) { "bdev:", 5, 0 });
386 if (!sb->s_root) {
387 iput(root);
388 return NULL;
389 }
390 sb->s_root->d_sb = sb;
391 sb->s_root->d_parent = sb->s_root;
392 d_instantiate(sb->s_root, root);
393 return sb;
394 }
395
396 static DECLARE_FSTYPE(bd_type, "bdev", bd_read_super, FS_NOMOUNT);
397
398 static struct vfsmount *bd_mnt;
399
400 /*
401 * bdev cache handling - shamelessly stolen from inode.c
402 * We use smaller hashtable, though.
403 */
404
405 #define HASH_BITS 6
406 #define HASH_SIZE (1UL << HASH_BITS)
407 #define HASH_MASK (HASH_SIZE-1)
408 static struct list_head bdev_hashtable[HASH_SIZE];
409 static spinlock_t bdev_lock = SPIN_LOCK_UNLOCKED;
410 static kmem_cache_t * bdev_cachep;
411
412 #define alloc_bdev() \
413 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
414 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
415
416 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
417 {
418 struct block_device * bdev = (struct block_device *) foo;
419
420 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
421 SLAB_CTOR_CONSTRUCTOR)
422 {
423 memset(bdev, 0, sizeof(*bdev));
424 sema_init(&bdev->bd_sem, 1);
425 INIT_LIST_HEAD(&bdev->bd_inodes);
426 }
427 }
428
429 void __init bdev_cache_init(void)
430 {
431 int i, err;
432 struct list_head *head = bdev_hashtable;
433
434 i = HASH_SIZE;
435 do {
436 INIT_LIST_HEAD(head);
437 head++;
438 i--;
439 } while (i);
440
441 bdev_cachep = kmem_cache_create("bdev_cache",
442 sizeof(struct block_device),
443 0, SLAB_HWCACHE_ALIGN, init_once,
444 NULL);
445 if (!bdev_cachep)
446 panic("Cannot create bdev_cache SLAB cache");
447 err = register_filesystem(&bd_type);
448 if (err)
449 panic("Cannot register bdev pseudo-fs");
450 bd_mnt = kern_mount(&bd_type);
451 err = PTR_ERR(bd_mnt);
452 if (IS_ERR(bd_mnt))
453 panic("Cannot create bdev pseudo-fs");
454 }
455
456 /*
457 * Most likely _very_ bad one - but then it's hardly critical for small
458 * /dev and can be fixed when somebody will need really large one.
459 */
460 static inline unsigned long hash(dev_t dev)
461 {
462 unsigned long tmp = dev;
463 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
464 return tmp & HASH_MASK;
465 }
466
467 static struct block_device *bdfind(dev_t dev, struct list_head *head)
468 {
469 struct list_head *p;
470 struct block_device *bdev;
471 for (p=head->next; p!=head; p=p->next) {
472 bdev = list_entry(p, struct block_device, bd_hash);
473 if (bdev->bd_dev != dev)
474 continue;
475 atomic_inc(&bdev->bd_count);
476 return bdev;
477 }
478 return NULL;
479 }
480
481 struct block_device *bdget(dev_t dev)
482 {
483 struct list_head * head = bdev_hashtable + hash(dev);
484 struct block_device *bdev, *new_bdev;
485 spin_lock(&bdev_lock);
486 bdev = bdfind(dev, head);
487 spin_unlock(&bdev_lock);
488 if (bdev)
489 return bdev;
490 new_bdev = alloc_bdev();
491 if (new_bdev) {
492 struct inode *inode = new_inode(bd_mnt->mnt_sb);
493 if (inode) {
494 atomic_set(&new_bdev->bd_count,1);
495 new_bdev->bd_dev = dev;
496 new_bdev->bd_op = NULL;
497 new_bdev->bd_inode = inode;
498 inode->i_rdev = to_kdev_t(dev);
499 inode->i_bdev = new_bdev;
500 inode->i_data.a_ops = &def_blk_aops;
501 spin_lock(&bdev_lock);
502 bdev = bdfind(dev, head);
503 if (!bdev) {
504 list_add(&new_bdev->bd_hash, head);
505 spin_unlock(&bdev_lock);
506 return new_bdev;
507 }
508 spin_unlock(&bdev_lock);
509 iput(new_bdev->bd_inode);
510 }
511 destroy_bdev(new_bdev);
512 }
513 return bdev;
514 }
515
516 static inline void __bd_forget(struct inode *inode)
517 {
518 list_del_init(&inode->i_devices);
519 inode->i_bdev = NULL;
520 inode->i_mapping = &inode->i_data;
521 }
522
523 void bdput(struct block_device *bdev)
524 {
525 if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
526 struct list_head *p;
527 if (bdev->bd_openers)
528 BUG();
529 list_del(&bdev->bd_hash);
530 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
531 __bd_forget(list_entry(p, struct inode, i_devices));
532 }
533 spin_unlock(&bdev_lock);
534 iput(bdev->bd_inode);
535 destroy_bdev(bdev);
536 }
537 }
538
539 int bd_acquire(struct inode *inode)
540 {
541 struct block_device *bdev;
542 spin_lock(&bdev_lock);
543 if (inode->i_bdev) {
544 atomic_inc(&inode->i_bdev->bd_count);
545 spin_unlock(&bdev_lock);
546 return 0;
547 }
548 spin_unlock(&bdev_lock);
549 bdev = bdget(kdev_t_to_nr(inode->i_rdev));
550 if (!bdev)
551 return -ENOMEM;
552 spin_lock(&bdev_lock);
553 if (!inode->i_bdev) {
554 inode->i_bdev = bdev;
555 inode->i_mapping = bdev->bd_inode->i_mapping;
556 list_add(&inode->i_devices, &bdev->bd_inodes);
557 } else if (inode->i_bdev != bdev)
558 BUG();
559 spin_unlock(&bdev_lock);
560 return 0;
561 }
562
563 /* Call when you free inode */
564
565 void bd_forget(struct inode *inode)
566 {
567 spin_lock(&bdev_lock);
568 if (inode->i_bdev)
569 __bd_forget(inode);
570 spin_unlock(&bdev_lock);
571 }
572
573 static struct {
574 const char *name;
575 struct block_device_operations *bdops;
576 } blkdevs[MAX_BLKDEV];
577
578 int get_blkdev_list(char * p)
579 {
580 int i;
581 int len;
582
583 len = sprintf(p, "\nBlock devices:\n");
584 for (i = 0; i < MAX_BLKDEV ; i++) {
585 if (blkdevs[i].bdops) {
586 len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
587 }
588 }
589 return len;
590 }
591
592 /*
593 Return the function table of a device.
594 Load the driver if needed.
595 */
596 const struct block_device_operations * get_blkfops(unsigned int major)
597 {
598 const struct block_device_operations *ret = NULL;
599
600 /* major 0 is used for non-device mounts */
601 if (major && major < MAX_BLKDEV) {
602 #ifdef CONFIG_KMOD
603 if (!blkdevs[major].bdops) {
604 char name[20];
605 sprintf(name, "block-major-%d", major);
606 request_module(name);
607 }
608 #endif
609 ret = blkdevs[major].bdops;
610 }
611 return ret;
612 }
613
614 int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
615 {
616 if (major == 0) {
617 for (major = MAX_BLKDEV-1; major > 0; major--) {
618 if (blkdevs[major].bdops == NULL) {
619 blkdevs[major].name = name;
620 blkdevs[major].bdops = bdops;
621 return major;
622 }
623 }
624 return -EBUSY;
625 }
626 if (major >= MAX_BLKDEV)
627 return -EINVAL;
628 if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
629 return -EBUSY;
630 blkdevs[major].name = name;
631 blkdevs[major].bdops = bdops;
632 return 0;
633 }
634
635 int unregister_blkdev(unsigned int major, const char * name)
636 {
637 if (major >= MAX_BLKDEV)
638 return -EINVAL;
639 if (!blkdevs[major].bdops)
640 return -EINVAL;
641 if (strcmp(blkdevs[major].name, name))
642 return -EINVAL;
643 blkdevs[major].name = NULL;
644 blkdevs[major].bdops = NULL;
645 return 0;
646 }
647
648 /*
649 * This routine checks whether a removable media has been changed,
650 * and invalidates all buffer-cache-entries in that case. This
651 * is a relatively slow routine, so we have to try to minimize using
652 * it. Thus it is called only upon a 'mount' or 'open'. This
653 * is the best way of combining speed and utility, I think.
654 * People changing diskettes in the middle of an operation deserve
655 * to lose :-)
656 */
657 int check_disk_change(kdev_t dev)
658 {
659 int i;
660 const struct block_device_operations * bdops = NULL;
661
662 i = MAJOR(dev);
663 if (i < MAX_BLKDEV)
664 bdops = blkdevs[i].bdops;
665 if (bdops == NULL) {
666 devfs_handle_t de;
667
668 de = devfs_find_handle (NULL, NULL, i, MINOR (dev),
669 DEVFS_SPECIAL_BLK, 0);
670 if (de) bdops = devfs_get_ops (de);
671 }
672 if (bdops == NULL)
673 return 0;
674 if (bdops->check_media_change == NULL)
675 return 0;
676 if (!bdops->check_media_change(dev))
677 return 0;
678
679 printk(KERN_DEBUG "VFS: Disk change detected on device %s\n",
680 bdevname(dev));
681
682 if (invalidate_device(dev, 0))
683 printk("VFS: busy inodes on changed media.\n");
684
685 if (bdops->revalidate)
686 bdops->revalidate(dev);
687 return 1;
688 }
689
690 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
691 {
692 int res;
693 mm_segment_t old_fs = get_fs();
694
695 if (!bdev->bd_op->ioctl)
696 return -EINVAL;
697 set_fs(KERNEL_DS);
698 res = bdev->bd_op->ioctl(bdev->bd_inode, NULL, cmd, arg);
699 set_fs(old_fs);
700 return res;
701 }
702
703 int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
704 {
705 int ret = -ENODEV;
706 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
707 down(&bdev->bd_sem);
708
709 lock_kernel();
710 if (!bdev->bd_op)
711 bdev->bd_op = get_blkfops(MAJOR(rdev));
712 if (bdev->bd_op) {
713 /*
714 * This crockload is due to bad choice of ->open() type.
715 * It will go away.
716 * For now, block device ->open() routine must _not_
717 * examine anything in 'inode' argument except ->i_rdev.
718 */
719 struct file fake_file = {};
720 struct dentry fake_dentry = {};
721 ret = -ENOMEM;
722 fake_file.f_mode = mode;
723 fake_file.f_flags = flags;
724 fake_file.f_dentry = &fake_dentry;
725 fake_dentry.d_inode = bdev->bd_inode;
726 ret = 0;
727 if (bdev->bd_op->open)
728 ret = bdev->bd_op->open(bdev->bd_inode, &fake_file);
729 if (!ret) {
730 bdev->bd_openers++;
731 } else if (!bdev->bd_openers)
732 bdev->bd_op = NULL;
733 }
734 unlock_kernel();
735 up(&bdev->bd_sem);
736 if (ret)
737 bdput(bdev);
738 return ret;
739 }
740
741 int blkdev_open(struct inode * inode, struct file * filp)
742 {
743 int ret;
744 struct block_device *bdev;
745
746 /*
747 * Preserve backwards compatibility and allow large file access
748 * even if userspace doesn't ask for it explicitly. Some mkfs
749 * binary needs it. We might want to drop this workaround
750 * during an unstable branch.
751 */
752 filp->f_flags |= O_LARGEFILE;
753
754 bd_acquire(inode);
755 bdev = inode->i_bdev;
756 down(&bdev->bd_sem);
757
758 ret = -ENXIO;
759 lock_kernel();
760 if (!bdev->bd_op)
761 bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
762 if (bdev->bd_op) {
763 ret = 0;
764 if (bdev->bd_op->open)
765 ret = bdev->bd_op->open(inode,filp);
766 if (!ret)
767 bdev->bd_openers++;
768 else if (!bdev->bd_openers)
769 bdev->bd_op = NULL;
770 }
771 unlock_kernel();
772 up(&bdev->bd_sem);
773 if (ret)
774 bdput(bdev);
775 return ret;
776 }
777
778 int blkdev_put(struct block_device *bdev, int kind)
779 {
780 int ret = 0;
781 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
782 struct inode *bd_inode = bdev->bd_inode;
783
784 down(&bdev->bd_sem);
785 lock_kernel();
786 if (kind == BDEV_FILE) {
787 struct super_block * sb;
788
789 __block_fsync(bd_inode);
790
791 /* Janitorianism: this shit must go away */
792 sb = get_super(bd_inode->i_rdev);
793 if (sb) {
794 if (sb->s_flags & MS_RDONLY) {
795 shrink_dcache_sb(sb);
796 invalidate_inodes(sb);
797 invalidate_buffers(bd_inode->i_rdev);
798 }
799 lock_super(sb);
800 if (sb->s_flags & MS_RDONLY)
801 update_buffers(bd_inode->i_rdev);
802 unlock_super(sb);
803 drop_super(sb);
804 }
805 } else if (kind == BDEV_FS)
806 fsync_no_super(rdev);
807 if (!--bdev->bd_openers) {
808 truncate_inode_pages(bd_inode->i_mapping, 0);
809 invalidate_buffers(rdev);
810 }
811 if (bdev->bd_op->release)
812 ret = bdev->bd_op->release(bd_inode, NULL);
813 if (!bdev->bd_openers)
814 bdev->bd_op = NULL;
815 unlock_kernel();
816 up(&bdev->bd_sem);
817 bdput(bdev);
818 return ret;
819 }
820
821 int blkdev_close(struct inode * inode, struct file * filp)
822 {
823 return blkdev_put(inode->i_bdev, BDEV_FILE);
824 }
825
826 static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
827 unsigned long arg)
828 {
829 if (inode->i_bdev->bd_op->ioctl)
830 return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
831 return -EINVAL;
832 }
833
834 struct address_space_operations def_blk_aops = {
835 readpage: blkdev_readpage,
836 writepage: blkdev_writepage,
837 sync_page: block_sync_page,
838 prepare_write: blkdev_prepare_write,
839 commit_write: blkdev_commit_write,
840 direct_IO: blkdev_direct_IO,
841 };
842
843 struct file_operations def_blk_fops = {
844 open: blkdev_open,
845 release: blkdev_close,
846 llseek: block_llseek,
847 read: generic_file_read,
848 write: generic_file_write,
849 mmap: generic_file_mmap,
850 fsync: block_fsync,
851 ioctl: blkdev_ioctl,
852 };
853
854 const char * bdevname(kdev_t dev)
855 {
856 static char buffer[32];
857 const char * name = blkdevs[MAJOR(dev)].name;
858
859 if (!name)
860 name = "unknown-block";
861
862 sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev));
863 return buffer;
864 }
865