File: /usr/src/linux/drivers/i2o/i2o_block.c
1 /*
2 * I2O Random Block Storage Class OSM
3 *
4 * (C) Copyright 1999 Red Hat Software
5 *
6 * Written by Alan Cox, Building Number Three Ltd
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * This is a beta test release. Most of the good code was taken
14 * from the nbd driver by Pavel Machek, who in turn took some of it
15 * from loop.c. Isn't free software great for reusability 8)
16 *
17 * Fixes/additions:
18 * Steve Ralston:
19 * Multiple device handling error fixes,
20 * Added a queue depth.
21 * Alan Cox:
22 * FC920 has an rmw bug. Dont or in the end marker.
23 * Removed queue walk, fixed for 64bitness.
24 * Deepak Saxena:
25 * Independent queues per IOP
26 * Support for dynamic device creation/deletion
27 * Code cleanup
28 * Support for larger I/Os through merge* functions
29 * (taken from DAC960 driver)
30 * Boji T Kannanthanam:
31 * Set the I2O Block devices to be detected in increasing
32 * order of TIDs during boot.
33 * Search and set the I2O block device that we boot off from as
34 * the first device to be claimed (as /dev/i2o/hda)
35 * Properly attach/detach I2O gendisk structure from the system
36 * gendisk list. The I2O block devices now appear in
37 * /proc/partitions.
38 *
39 * To do:
40 * Serial number scanning to find duplicates for FC multipathing
41 */
42
43 #include <linux/major.h>
44
45 #include <linux/module.h>
46
47 #include <linux/sched.h>
48 #include <linux/fs.h>
49 #include <linux/stat.h>
50 #include <linux/errno.h>
51 #include <linux/file.h>
52 #include <linux/ioctl.h>
53 #include <linux/i2o.h>
54 #include <linux/blkdev.h>
55 #include <linux/blkpg.h>
56 #include <linux/slab.h>
57 #include <linux/hdreg.h>
58
59 #include <linux/notifier.h>
60 #include <linux/reboot.h>
61
62 #include <asm/uaccess.h>
63 #include <asm/semaphore.h>
64 #include <linux/completion.h>
65 #include <asm/io.h>
66 #include <asm/atomic.h>
67 #include <linux/smp_lock.h>
68 #include <linux/wait.h>
69
70 #define MAJOR_NR I2O_MAJOR
71
72 #include <linux/blk.h>
73
74 #define MAX_I2OB 16
75
76 #define MAX_I2OB_DEPTH 128
77 #define MAX_I2OB_RETRIES 4
78
79 //#define DRIVERDEBUG
80 #ifdef DRIVERDEBUG
81 #define DEBUG( s )
82 #else
83 #define DEBUG( s ) printk( s )
84 #endif
85
86 /*
87 * Events that this OSM is interested in
88 */
89 #define I2OB_EVENT_MASK (I2O_EVT_IND_BSA_VOLUME_LOAD | \
90 I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
91 I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
92 I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
93 I2O_EVT_IND_BSA_SCSI_SMART )
94
95
96 /*
97 * I2O Block Error Codes - should be in a header file really...
98 */
99 #define I2O_BSA_DSC_SUCCESS 0x0000
100 #define I2O_BSA_DSC_MEDIA_ERROR 0x0001
101 #define I2O_BSA_DSC_ACCESS_ERROR 0x0002
102 #define I2O_BSA_DSC_DEVICE_FAILURE 0x0003
103 #define I2O_BSA_DSC_DEVICE_NOT_READY 0x0004
104 #define I2O_BSA_DSC_MEDIA_NOT_PRESENT 0x0005
105 #define I2O_BSA_DSC_MEDIA_LOCKED 0x0006
106 #define I2O_BSA_DSC_MEDIA_FAILURE 0x0007
107 #define I2O_BSA_DSC_PROTOCOL_FAILURE 0x0008
108 #define I2O_BSA_DSC_BUS_FAILURE 0x0009
109 #define I2O_BSA_DSC_ACCESS_VIOLATION 0x000A
110 #define I2O_BSA_DSC_WRITE_PROTECTED 0x000B
111 #define I2O_BSA_DSC_DEVICE_RESET 0x000C
112 #define I2O_BSA_DSC_VOLUME_CHANGED 0x000D
113 #define I2O_BSA_DSC_TIMEOUT 0x000E
114
115 /*
116 * Some of these can be made smaller later
117 */
118
119 static int i2ob_blksizes[MAX_I2OB<<4];
120 static int i2ob_hardsizes[MAX_I2OB<<4];
121 static int i2ob_sizes[MAX_I2OB<<4];
122 static int i2ob_media_change_flag[MAX_I2OB];
123 static u32 i2ob_max_sectors[MAX_I2OB<<4];
124
125 static int i2ob_context;
126
127 /*
128 * I2O Block device descriptor
129 */
130 struct i2ob_device
131 {
132 struct i2o_controller *controller;
133 struct i2o_device *i2odev;
134 int unit;
135 int tid;
136 int flags;
137 int refcnt;
138 struct request *head, *tail;
139 request_queue_t *req_queue;
140 int max_segments;
141 int done_flag;
142 int constipated;
143 int depth;
144 };
145
146 /*
147 * FIXME:
148 * We should cache align these to avoid ping-ponging lines on SMP
149 * boxes under heavy I/O load...
150 */
151 struct i2ob_request
152 {
153 struct i2ob_request *next;
154 struct request *req;
155 int num;
156 };
157
158 /*
159 * Per IOP requst queue information
160 *
161 * We have a separate requeust_queue_t per IOP so that a heavilly
162 * loaded I2O block device on an IOP does not starve block devices
163 * across all I2O controllers.
164 *
165 */
166 struct i2ob_iop_queue
167 {
168 atomic_t queue_depth;
169 struct i2ob_request request_queue[MAX_I2OB_DEPTH];
170 struct i2ob_request *i2ob_qhead;
171 request_queue_t req_queue;
172 };
173 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
174 static struct i2ob_request *i2ob_backlog[MAX_I2O_CONTROLLERS];
175 static struct i2ob_request *i2ob_backlog_tail[MAX_I2O_CONTROLLERS];
176
177 /*
178 * Each I2O disk is one of these.
179 */
180
181 static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
182 static int i2ob_dev_count = 0;
183 static struct hd_struct i2ob[MAX_I2OB<<4];
184 static struct gendisk i2ob_gendisk; /* Declared later */
185
186 /*
187 * Mutex and spin lock for event handling synchronization
188 * evt_msg contains the last event.
189 */
190 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
191 static DECLARE_COMPLETION(i2ob_thread_dead);
192 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
193 static u32 evt_msg[MSG_FRAME_SIZE>>2];
194
195 static struct timer_list i2ob_timer;
196 static int i2ob_timer_started = 0;
197
198 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
199 struct i2o_message *);
200 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
201 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
202 static void i2ob_reboot_event(void);
203 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
204 static void i2ob_end_request(struct request *);
205 static void i2ob_request(request_queue_t *);
206 static int i2ob_backlog_request(struct i2o_controller *, struct i2ob_device *);
207 static int i2ob_init_iop(unsigned int);
208 static request_queue_t* i2ob_get_queue(kdev_t);
209 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
210 static int do_i2ob_revalidate(kdev_t, int);
211 static int i2ob_evt(void *);
212
213 static int evt_pid = 0;
214 static int evt_running = 0;
215 static int scan_unit = 0;
216
217 /*
218 * I2O OSM registration structure...keeps getting bigger and bigger :)
219 */
220 static struct i2o_handler i2o_block_handler =
221 {
222 i2o_block_reply,
223 i2ob_new_device,
224 i2ob_del_device,
225 i2ob_reboot_event,
226 "I2O Block OSM",
227 0,
228 I2O_CLASS_RANDOM_BLOCK_STORAGE
229 };
230
231 /*
232 * Get a message
233 */
234
235 static u32 i2ob_get(struct i2ob_device *dev)
236 {
237 struct i2o_controller *c=dev->controller;
238 return I2O_POST_READ32(c);
239 }
240
241 /*
242 * Turn a Linux block request into an I2O block read/write.
243 */
244
245 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit)
246 {
247 struct i2o_controller *c = dev->controller;
248 int tid = dev->tid;
249 unsigned long msg;
250 unsigned long mptr;
251 u64 offset;
252 struct request *req = ireq->req;
253 struct buffer_head *bh = req->bh;
254 int count = req->nr_sectors<<9;
255 char *last = NULL;
256 unsigned short size = 0;
257
258 // printk(KERN_INFO "i2ob_send called\n");
259 /* Map the message to a virtual address */
260 msg = c->mem_offset + m;
261
262 /*
263 * Build the message based on the request.
264 */
265 __raw_writel(i2ob_context|(unit<<8), msg+8);
266 __raw_writel(ireq->num, msg+12);
267 __raw_writel(req->nr_sectors << 9, msg+20);
268
269 /*
270 * Mask out partitions from now on
271 */
272 unit &= 0xF0;
273
274 /* This can be optimised later - just want to be sure its right for
275 starters */
276 offset = ((u64)(req->sector+base)) << 9;
277 __raw_writel( offset & 0xFFFFFFFF, msg+24);
278 __raw_writel(offset>>32, msg+28);
279 mptr=msg+32;
280
281 if(req->cmd == READ)
282 {
283 __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
284 while(bh!=NULL)
285 {
286 if(bh->b_data == last) {
287 size += bh->b_size;
288 last += bh->b_size;
289 if(bh->b_reqnext)
290 __raw_writel(0x14000000|(size), mptr-8);
291 else
292 __raw_writel(0xD4000000|(size), mptr-8);
293 }
294 else
295 {
296 if(bh->b_reqnext)
297 __raw_writel(0x10000000|(bh->b_size), mptr);
298 else
299 __raw_writel(0xD0000000|(bh->b_size), mptr);
300 __raw_writel(virt_to_bus(bh->b_data), mptr+4);
301 mptr += 8;
302 size = bh->b_size;
303 last = bh->b_data + size;
304 }
305
306 count -= bh->b_size;
307 bh = bh->b_reqnext;
308 }
309 /*
310 * Heuristic for now since the block layer doesnt give
311 * us enough info. If its a big write assume sequential
312 * readahead on controller. If its small then don't read
313 * ahead but do use the controller cache.
314 */
315 if(size >= 8192)
316 __raw_writel((8<<24)|(1<<16)|8, msg+16);
317 else
318 __raw_writel((8<<24)|(1<<16)|4, msg+16);
319 }
320 else if(req->cmd == WRITE)
321 {
322 __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
323 while(bh!=NULL)
324 {
325 if(bh->b_data == last) {
326 size += bh->b_size;
327 last += bh->b_size;
328 if(bh->b_reqnext)
329 __raw_writel(0x14000000|(size), mptr-8);
330 else
331 __raw_writel(0xD4000000|(size), mptr-8);
332 }
333 else
334 {
335 if(bh->b_reqnext)
336 __raw_writel(0x14000000|(bh->b_size), mptr);
337 else
338 __raw_writel(0xD4000000|(bh->b_size), mptr);
339 __raw_writel(virt_to_bus(bh->b_data), mptr+4);
340 mptr += 8;
341 size = bh->b_size;
342 last = bh->b_data + size;
343 }
344
345 count -= bh->b_size;
346 bh = bh->b_reqnext;
347 }
348
349 if(c->battery)
350 {
351
352 if(size>16384)
353 __raw_writel(4, msg+16);
354 else
355 /*
356 * Allow replies to come back once data is cached in the controller
357 * This allows us to handle writes quickly thus giving more of the
358 * queue to reads.
359 */
360 __raw_writel(16, msg+16);
361 }
362 else
363 {
364 /* Large write, don't cache */
365 if(size>8192)
366 __raw_writel(4, msg+16);
367 else
368 /* write through */
369 __raw_writel(8, msg+16);
370 }
371 }
372 __raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
373
374 if(count != 0)
375 {
376 printk(KERN_ERR "Request count botched by %d.\n", count);
377 }
378
379 i2o_post_message(c,m);
380 atomic_inc(&i2ob_queues[c->unit]->queue_depth);
381
382 return 0;
383 }
384
385 /*
386 * Remove a request from the _locked_ request list. We update both the
387 * list chain and if this is the last item the tail pointer. Caller
388 * must hold the lock.
389 */
390
391 static inline void i2ob_unhook_request(struct i2ob_request *ireq,
392 unsigned int iop)
393 {
394 ireq->next = i2ob_queues[iop]->i2ob_qhead;
395 i2ob_queues[iop]->i2ob_qhead = ireq;
396 }
397
398 /*
399 * Request completion handler
400 */
401
402 static inline void i2ob_end_request(struct request *req)
403 {
404 /*
405 * Loop until all of the buffers that are linked
406 * to this request have been marked updated and
407 * unlocked.
408 */
409
410 while (end_that_request_first( req, !req->errors, "i2o block" ));
411
412 /*
413 * It is now ok to complete the request.
414 */
415 end_that_request_last( req );
416 }
417
418 /*
419 * Request merging functions
420 */
421 static inline int i2ob_new_segment(request_queue_t *q, struct request *req,
422 int __max_segments)
423 {
424 int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
425
426 if (__max_segments < max_segments)
427 max_segments = __max_segments;
428
429 if (req->nr_segments < max_segments) {
430 req->nr_segments++;
431 return 1;
432 }
433 return 0;
434 }
435
436 static int i2ob_back_merge(request_queue_t *q, struct request *req,
437 struct buffer_head *bh, int __max_segments)
438 {
439 if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
440 return 1;
441 return i2ob_new_segment(q, req, __max_segments);
442 }
443
444 static int i2ob_front_merge(request_queue_t *q, struct request *req,
445 struct buffer_head *bh, int __max_segments)
446 {
447 if (bh->b_data + bh->b_size == req->bh->b_data)
448 return 1;
449 return i2ob_new_segment(q, req, __max_segments);
450 }
451
452 static int i2ob_merge_requests(request_queue_t *q,
453 struct request *req,
454 struct request *next,
455 int __max_segments)
456 {
457 int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
458 int total_segments = req->nr_segments + next->nr_segments;
459
460 if (__max_segments < max_segments)
461 max_segments = __max_segments;
462
463 if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
464 total_segments--;
465
466 if (total_segments > max_segments)
467 return 0;
468
469 req->nr_segments = total_segments;
470 return 1;
471 }
472
473 static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit)
474 {
475 unsigned long msg;
476 u32 m = i2ob_get(d);
477
478 if(m == 0xFFFFFFFF)
479 return -1;
480
481 msg = c->mem_offset + m;
482
483 /*
484 * Ask the controller to write the cache back. This sorts out
485 * the supertrak firmware flaw and also does roughly the right
486 * thing for other cases too.
487 */
488
489 __raw_writel(FIVE_WORD_MSG_SIZE|SGL_OFFSET_0, msg);
490 __raw_writel(I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|d->tid, msg+4);
491 __raw_writel(i2ob_context|(unit<<8), msg+8);
492 __raw_writel(0, msg+12);
493 __raw_writel(60<<16, msg+16);
494
495 i2o_post_message(c,m);
496 return 0;
497 }
498
499 /*
500 * OSM reply handler. This gets all the message replies
501 */
502
503 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
504 {
505 unsigned long flags;
506 struct i2ob_request *ireq = NULL;
507 u8 st;
508 u32 *m = (u32 *)msg;
509 u8 unit = (m[2]>>8)&0xF0; /* low 4 bits are partition */
510 struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
511
512 /*
513 * FAILed message
514 */
515 if(m[0] & (1<<13))
516 {
517 /*
518 * FAILed message from controller
519 * We increment the error count and abort it
520 *
521 * In theory this will never happen. The I2O block class
522 * speficiation states that block devices never return
523 * FAILs but instead use the REQ status field...but
524 * better be on the safe side since no one really follows
525 * the spec to the book :)
526 */
527 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
528 ireq->req->errors++;
529
530 spin_lock_irqsave(&io_request_lock, flags);
531 i2ob_unhook_request(ireq, c->unit);
532 i2ob_end_request(ireq->req);
533 spin_unlock_irqrestore(&io_request_lock, flags);
534
535 /* Now flush the message by making it a NOP */
536 m[0]&=0x00FFFFFF;
537 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
538 i2o_post_message(c,virt_to_bus(m));
539
540 return;
541 }
542
543 if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
544 {
545 spin_lock(&i2ob_evt_lock);
546 memcpy(evt_msg, msg, (m[0]>>16)<<2);
547 spin_unlock(&i2ob_evt_lock);
548 up(&i2ob_evt_sem);
549 return;
550 }
551
552 if(msg->function == I2O_CMD_BLOCK_CFLUSH)
553 {
554 spin_lock_irqsave(&io_request_lock, flags);
555 dev->constipated=0;
556 DEBUG(("unconstipated\n"));
557 if(i2ob_backlog_request(c, dev)==0)
558 i2ob_request(dev->req_queue);
559 spin_unlock_irqrestore(&io_request_lock, flags);
560 return;
561 }
562
563 if(!dev->i2odev)
564 {
565 /*
566 * This is HACK, but Intel Integrated RAID allows user
567 * to delete a volume that is claimed, locked, and in use
568 * by the OS. We have to check for a reply from a
569 * non-existent device and flag it as an error or the system
570 * goes kaput...
571 */
572 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
573 ireq->req->errors++;
574 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
575 spin_lock_irqsave(&io_request_lock, flags);
576 i2ob_unhook_request(ireq, c->unit);
577 i2ob_end_request(ireq->req);
578 spin_unlock_irqrestore(&io_request_lock, flags);
579 return;
580 }
581
582 /*
583 * Lets see what is cooking. We stuffed the
584 * request in the context.
585 */
586
587 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
588 st=m[4]>>24;
589
590 if(st!=0)
591 {
592 int err;
593 char *bsa_errors[] =
594 {
595 "Success",
596 "Media Error",
597 "Failure communicating to device",
598 "Device Failure",
599 "Device is not ready",
600 "Media not present",
601 "Media is locked by another user",
602 "Media has failed",
603 "Failure communicating to device",
604 "Device bus failure",
605 "Device is locked by another user",
606 "Device is write protected",
607 "Device has reset",
608 "Volume has changed, waiting for acknowledgement"
609 };
610
611 err = m[4]&0xFFFF;
612
613 /*
614 * Device not ready means two things. One is that the
615 * the thing went offline (but not a removal media)
616 *
617 * The second is that you have a SuperTrak 100 and the
618 * firmware got constipated. Unlike standard i2o card
619 * setups the supertrak returns an error rather than
620 * blocking for the timeout in these cases.
621 */
622
623
624 spin_lock_irqsave(&io_request_lock, flags);
625 if(err==4)
626 {
627 /*
628 * Time to uncork stuff
629 */
630
631 if(!dev->constipated)
632 {
633 dev->constipated = 1;
634 DEBUG(("constipated\n"));
635 /* Now pull the chain */
636 if(i2ob_flush(c, dev, unit)<0)
637 {
638 DEBUG(("i2ob: Unable to queue flush. Retrying I/O immediately.\n"));
639 dev->constipated=0;
640 }
641 DEBUG(("flushing\n"));
642 }
643
644 /*
645 * Recycle the request
646 */
647
648 // i2ob_unhook_request(ireq, c->unit);
649
650 /*
651 * Place it on the recycle queue
652 */
653
654 ireq->next = NULL;
655 if(i2ob_backlog_tail[c->unit]!=NULL)
656 i2ob_backlog_tail[c->unit]->next = ireq;
657 else
658 i2ob_backlog[c->unit] = ireq;
659 i2ob_backlog_tail[c->unit] = ireq;
660
661 atomic_dec(&i2ob_queues[c->unit]->queue_depth);
662
663 /*
664 * If the constipator flush failed we want to
665 * poke the queue again.
666 */
667
668 i2ob_request(dev->req_queue);
669 spin_unlock_irqrestore(&io_request_lock, flags);
670
671 /*
672 * and out
673 */
674
675 return;
676 }
677 spin_unlock_irqrestore(&io_request_lock, flags);
678 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name,
679 bsa_errors[m[4]&0XFFFF]);
680 if(m[4]&0x00FF0000)
681 printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
682 printk(".\n");
683 ireq->req->errors++;
684 }
685 else
686 ireq->req->errors = 0;
687
688 /*
689 * Dequeue the request. We use irqsave locks as one day we
690 * may be running polled controllers from a BH...
691 */
692
693 spin_lock_irqsave(&io_request_lock, flags);
694 i2ob_unhook_request(ireq, c->unit);
695 i2ob_end_request(ireq->req);
696 atomic_dec(&i2ob_queues[c->unit]->queue_depth);
697
698 /*
699 * We may be able to do more I/O
700 */
701
702 if(i2ob_backlog_request(c, dev)==0)
703 i2ob_request(dev->req_queue);
704
705 spin_unlock_irqrestore(&io_request_lock, flags);
706 }
707
708 /*
709 * Event handler. Needs to be a separate thread b/c we may have
710 * to do things like scan a partition table, or query parameters
711 * which cannot be done from an interrupt or from a bottom half.
712 */
713 static int i2ob_evt(void *dummy)
714 {
715 unsigned int evt;
716 unsigned long flags;
717 int unit;
718 int i;
719 //The only event that has data is the SCSI_SMART event.
720 struct i2o_reply {
721 u32 header[4];
722 u32 evt_indicator;
723 u8 ASC;
724 u8 ASCQ;
725 u8 data[16];
726 } *evt_local;
727
728 lock_kernel();
729 daemonize();
730 unlock_kernel();
731
732 strcpy(current->comm, "i2oblock");
733 evt_running = 1;
734
735 while(1)
736 {
737 if(down_interruptible(&i2ob_evt_sem))
738 {
739 evt_running = 0;
740 printk("exiting...");
741 break;
742 }
743
744 /*
745 * Keep another CPU/interrupt from overwriting the
746 * message while we're reading it
747 *
748 * We stuffed the unit in the TxContext and grab the event mask
749 * None of the BSA we care about events have EventData
750 */
751 spin_lock_irqsave(&i2ob_evt_lock, flags);
752 evt_local = (struct i2o_reply *)evt_msg;
753 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
754
755 unit = evt_local->header[3];
756 evt = evt_local->evt_indicator;
757
758 switch(evt)
759 {
760 /*
761 * New volume loaded on same TID, so we just re-install.
762 * The TID/controller don't change as it is the same
763 * I2O device. It's just new media that we have to
764 * rescan.
765 */
766 case I2O_EVT_IND_BSA_VOLUME_LOAD:
767 {
768 i2ob_install_device(i2ob_dev[unit].i2odev->controller,
769 i2ob_dev[unit].i2odev, unit);
770 break;
771 }
772
773 /*
774 * No media, so set all parameters to 0 and set the media
775 * change flag. The I2O device is still valid, just doesn't
776 * have media, so we don't want to clear the controller or
777 * device pointer.
778 */
779 case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
780 {
781 for(i = unit; i <= unit+15; i++)
782 {
783 i2ob_sizes[i] = 0;
784 i2ob_hardsizes[i] = 0;
785 i2ob_max_sectors[i] = 0;
786 i2ob[i].nr_sects = 0;
787 i2ob_gendisk.part[i].nr_sects = 0;
788 }
789 i2ob_media_change_flag[unit] = 1;
790 break;
791 }
792
793 case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
794 printk(KERN_WARNING "%s: Attempt to eject locked media\n",
795 i2ob_dev[unit].i2odev->dev_name);
796 break;
797
798 /*
799 * The capacity has changed and we are going to be
800 * updating the max_sectors and other information
801 * about this disk. We try a revalidate first. If
802 * the block device is in use, we don't want to
803 * do that as there may be I/Os bound for the disk
804 * at the moment. In that case we read the size
805 * from the device and update the information ourselves
806 * and the user can later force a partition table
807 * update through an ioctl.
808 */
809 case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
810 {
811 u64 size;
812
813 if(do_i2ob_revalidate(MKDEV(MAJOR_NR, unit),0) != -EBUSY)
814 continue;
815
816 if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
817 i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
818
819 spin_lock_irqsave(&io_request_lock, flags);
820 i2ob_sizes[unit] = (int)(size>>10);
821 i2ob_gendisk.part[unit].nr_sects = size>>9;
822 i2ob[unit].nr_sects = (int)(size>>9);
823 spin_unlock_irqrestore(&io_request_lock, flags);
824 break;
825 }
826
827 /*
828 * We got a SCSI SMART event, we just log the relevant
829 * information and let the user decide what they want
830 * to do with the information.
831 */
832 case I2O_EVT_IND_BSA_SCSI_SMART:
833 {
834 char buf[16];
835 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
836 evt_local->data[16]='\0';
837 sprintf(buf,"%s",&evt_local->data[0]);
838 printk(KERN_INFO " Disk Serial#:%s\n",buf);
839 printk(KERN_INFO " ASC 0x%02x \n",evt_local->ASC);
840 printk(KERN_INFO " ASCQ 0x%02x \n",evt_local->ASCQ);
841 break;
842 }
843
844 /*
845 * Non event
846 */
847
848 case 0:
849 break;
850
851 /*
852 * An event we didn't ask for. Call the card manufacturer
853 * and tell them to fix their firmware :)
854 */
855 default:
856 printk(KERN_INFO "%s: Received event %d we didn't register for\n"
857 KERN_INFO " Blame the I2O card manufacturer 8)\n",
858 i2ob_dev[unit].i2odev->dev_name, evt);
859 break;
860 }
861 };
862
863 complete_and_exit(&i2ob_thread_dead,0);
864 return 0;
865 }
866
867 /*
868 * The timer handler will attempt to restart requests
869 * that are queued to the driver. This handler
870 * currently only gets called if the controller
871 * had no more room in its inbound fifo.
872 */
873
874 static void i2ob_timer_handler(unsigned long q)
875 {
876 unsigned long flags;
877
878 /*
879 * We cannot touch the request queue or the timer
880 * flag without holding the io_request_lock.
881 */
882 spin_lock_irqsave(&io_request_lock,flags);
883
884 /*
885 * Clear the timer started flag so that
886 * the timer can be queued again.
887 */
888 i2ob_timer_started = 0;
889
890 /*
891 * Restart any requests.
892 */
893 i2ob_request((request_queue_t*)q);
894
895 /*
896 * Free the lock.
897 */
898 spin_unlock_irqrestore(&io_request_lock,flags);
899 }
900
901 static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev)
902 {
903 u32 m;
904 struct i2ob_request *ireq;
905
906 while((ireq=i2ob_backlog[c->unit])!=NULL)
907 {
908 int unit;
909
910 if(atomic_read(&i2ob_queues[c->unit]->queue_depth) > dev->depth/4)
911 break;
912
913 m = i2ob_get(dev);
914 if(m == 0xFFFFFFFF)
915 break;
916
917 i2ob_backlog[c->unit] = ireq->next;
918 if(i2ob_backlog[c->unit] == NULL)
919 i2ob_backlog_tail[c->unit] = NULL;
920
921 unit = MINOR(ireq->req->rq_dev);
922 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, unit);
923 }
924 if(i2ob_backlog[c->unit])
925 return 1;
926 return 0;
927 }
928
929 /*
930 * The I2O block driver is listed as one of those that pulls the
931 * front entry off the queue before processing it. This is important
932 * to remember here. If we drop the io lock then CURRENT will change
933 * on us. We must unlink CURRENT in this routine before we return, if
934 * we use it.
935 */
936
937 static void i2ob_request(request_queue_t *q)
938 {
939 struct request *req;
940 struct i2ob_request *ireq;
941 int unit;
942 struct i2ob_device *dev;
943 u32 m;
944
945
946 while (!list_empty(&q->queue_head)) {
947 /*
948 * On an IRQ completion if there is an inactive
949 * request on the queue head it means it isnt yet
950 * ready to dispatch.
951 */
952 req = blkdev_entry_next_request(&q->queue_head);
953
954 if(req->rq_status == RQ_INACTIVE)
955 return;
956
957 unit = MINOR(req->rq_dev);
958 dev = &i2ob_dev[(unit&0xF0)];
959
960 /*
961 * Queue depths probably belong with some kind of
962 * generic IOP commit control. Certainly its not right
963 * its global!
964 */
965 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
966 break;
967
968 /*
969 * Is the channel constipated ?
970 */
971
972 if(i2ob_backlog[dev->unit]!=NULL)
973 break;
974
975 /* Get a message */
976 m = i2ob_get(dev);
977
978 if(m==0xFFFFFFFF)
979 {
980 /*
981 * See if the timer has already been queued.
982 */
983 if (!i2ob_timer_started)
984 {
985 DEBUG((KERN_ERR "i2ob: starting timer\n"));
986
987 /*
988 * Set the timer_started flag to insure
989 * that the timer is only queued once.
990 * Queing it more than once will corrupt
991 * the timer queue.
992 */
993 i2ob_timer_started = 1;
994
995 /*
996 * Set up the timer to expire in
997 * 500ms.
998 */
999 i2ob_timer.expires = jiffies + (HZ >> 1);
1000 i2ob_timer.data = (unsigned int)q;
1001
1002 /*
1003 * Start it.
1004 */
1005
1006 add_timer(&i2ob_timer);
1007 return;
1008 }
1009 }
1010
1011 /*
1012 * Everything ok, so pull from kernel queue onto our queue
1013 */
1014 req->errors = 0;
1015 blkdev_dequeue_request(req);
1016 req->waiting = NULL;
1017
1018 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
1019 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
1020 ireq->req = req;
1021
1022 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, (unit&0xF0));
1023 }
1024 }
1025
1026
1027 /*
1028 * SCSI-CAM for ioctl geometry mapping
1029 * Duplicated with SCSI - this should be moved into somewhere common
1030 * perhaps genhd ?
1031 *
1032 * LBA -> CHS mapping table taken from:
1033 *
1034 * "Incorporating the I2O Architecture into BIOS for Intel Architecture
1035 * Platforms"
1036 *
1037 * This is an I2O document that is only available to I2O members,
1038 * not developers.
1039 *
1040 * From my understanding, this is how all the I2O cards do this
1041 *
1042 * Disk Size | Sectors | Heads | Cylinders
1043 * ---------------+---------+-------+-------------------
1044 * 1 < X <= 528M | 63 | 16 | X/(63 * 16 * 512)
1045 * 528M < X <= 1G | 63 | 32 | X/(63 * 32 * 512)
1046 * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
1047 * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
1048 *
1049 */
1050 #define BLOCK_SIZE_528M 1081344
1051 #define BLOCK_SIZE_1G 2097152
1052 #define BLOCK_SIZE_21G 4403200
1053 #define BLOCK_SIZE_42G 8806400
1054 #define BLOCK_SIZE_84G 17612800
1055
1056 static void i2o_block_biosparam(
1057 unsigned long capacity,
1058 unsigned short *cyls,
1059 unsigned char *hds,
1060 unsigned char *secs)
1061 {
1062 unsigned long heads, sectors, cylinders;
1063
1064 sectors = 63L; /* Maximize sectors per track */
1065 if(capacity <= BLOCK_SIZE_528M)
1066 heads = 16;
1067 else if(capacity <= BLOCK_SIZE_1G)
1068 heads = 32;
1069 else if(capacity <= BLOCK_SIZE_21G)
1070 heads = 64;
1071 else if(capacity <= BLOCK_SIZE_42G)
1072 heads = 128;
1073 else
1074 heads = 255;
1075
1076 cylinders = capacity / (heads * sectors);
1077
1078 *cyls = (unsigned short) cylinders; /* Stuff return values */
1079 *secs = (unsigned char) sectors;
1080 *hds = (unsigned char) heads;
1081 }
1082
1083
1084 /*
1085 * Rescan the partition tables
1086 */
1087
1088 static int do_i2ob_revalidate(kdev_t dev, int maxu)
1089 {
1090 int minor=MINOR(dev);
1091 int i;
1092
1093 minor&=0xF0;
1094
1095 i2ob_dev[minor].refcnt++;
1096 if(i2ob_dev[minor].refcnt>maxu+1)
1097 {
1098 i2ob_dev[minor].refcnt--;
1099 return -EBUSY;
1100 }
1101
1102 for( i = 15; i>=0 ; i--)
1103 {
1104 int m = minor+i;
1105 invalidate_device(MKDEV(MAJOR_NR, m), 1);
1106 i2ob_gendisk.part[m].start_sect = 0;
1107 i2ob_gendisk.part[m].nr_sects = 0;
1108 }
1109
1110 /*
1111 * Do a physical check and then reconfigure
1112 */
1113
1114 i2ob_install_device(i2ob_dev[minor].controller, i2ob_dev[minor].i2odev,
1115 minor);
1116 i2ob_dev[minor].refcnt--;
1117 return 0;
1118 }
1119
1120 /*
1121 * Issue device specific ioctl calls.
1122 */
1123
1124 static int i2ob_ioctl(struct inode *inode, struct file *file,
1125 unsigned int cmd, unsigned long arg)
1126 {
1127 struct i2ob_device *dev;
1128 int minor;
1129
1130 /* Anyone capable of this syscall can do *real bad* things */
1131
1132 if (!capable(CAP_SYS_ADMIN))
1133 return -EPERM;
1134 if (!inode)
1135 return -EINVAL;
1136 minor = MINOR(inode->i_rdev);
1137 if (minor >= (MAX_I2OB<<4))
1138 return -ENODEV;
1139
1140 dev = &i2ob_dev[minor];
1141 switch (cmd) {
1142 case BLKGETSIZE:
1143 return put_user(i2ob[minor].nr_sects, (long *) arg);
1144 case BLKGETSIZE64:
1145 return put_user((u64)i2ob[minor].nr_sects << 9, (u64 *)arg);
1146
1147 case HDIO_GETGEO:
1148 {
1149 struct hd_geometry g;
1150 int u=minor&0xF0;
1151 i2o_block_biosparam(i2ob_sizes[u]<<1,
1152 &g.cylinders, &g.heads, &g.sectors);
1153 g.start = i2ob[minor].start_sect;
1154 return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
1155 }
1156
1157 case BLKRRPART:
1158 if(!capable(CAP_SYS_ADMIN))
1159 return -EACCES;
1160 return do_i2ob_revalidate(inode->i_rdev,1);
1161
1162 case BLKFLSBUF:
1163 case BLKROSET:
1164 case BLKROGET:
1165 case BLKRASET:
1166 case BLKRAGET:
1167 case BLKPG:
1168 return blk_ioctl(inode->i_rdev, cmd, arg);
1169
1170 default:
1171 return -EINVAL;
1172 }
1173 }
1174
1175 /*
1176 * Close the block device down
1177 */
1178
1179 static int i2ob_release(struct inode *inode, struct file *file)
1180 {
1181 struct i2ob_device *dev;
1182 int minor;
1183
1184 minor = MINOR(inode->i_rdev);
1185 if (minor >= (MAX_I2OB<<4))
1186 return -ENODEV;
1187 dev = &i2ob_dev[(minor&0xF0)];
1188
1189 /*
1190 * This is to deail with the case of an application
1191 * opening a device and then the device dissapears while
1192 * it's in use, and then the application tries to release
1193 * it. ex: Unmounting a deleted RAID volume at reboot.
1194 * If we send messages, it will just cause FAILs since
1195 * the TID no longer exists.
1196 */
1197 if(!dev->i2odev)
1198 return 0;
1199
1200 if (dev->refcnt <= 0)
1201 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
1202 dev->refcnt--;
1203 if(dev->refcnt==0)
1204 {
1205 /*
1206 * Flush the onboard cache on unmount
1207 */
1208 u32 msg[5];
1209 int *query_done = &dev->done_flag;
1210 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1211 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1212 msg[2] = i2ob_context|0x40000000;
1213 msg[3] = (u32)query_done;
1214 msg[4] = 60<<16;
1215 DEBUG("Flushing...");
1216 i2o_post_wait(dev->controller, msg, 20, 60);
1217
1218 /*
1219 * Unlock the media
1220 */
1221 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1222 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1223 msg[2] = i2ob_context|0x40000000;
1224 msg[3] = (u32)query_done;
1225 msg[4] = -1;
1226 DEBUG("Unlocking...");
1227 i2o_post_wait(dev->controller, msg, 20, 2);
1228 DEBUG("Unlocked.\n");
1229
1230 /*
1231 * Now unclaim the device.
1232 */
1233
1234 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
1235 printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
1236
1237 DEBUG("Unclaim\n");
1238 }
1239 MOD_DEC_USE_COUNT;
1240 return 0;
1241 }
1242
1243 /*
1244 * Open the block device.
1245 */
1246
1247 static int i2ob_open(struct inode *inode, struct file *file)
1248 {
1249 int minor;
1250 struct i2ob_device *dev;
1251
1252 if (!inode)
1253 return -EINVAL;
1254 minor = MINOR(inode->i_rdev);
1255 if (minor >= MAX_I2OB<<4)
1256 return -ENODEV;
1257 dev=&i2ob_dev[(minor&0xF0)];
1258
1259 if(!dev->i2odev)
1260 return -ENODEV;
1261
1262 if(dev->refcnt++==0)
1263 {
1264 u32 msg[6];
1265
1266 DEBUG("Claim ");
1267 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
1268 {
1269 dev->refcnt--;
1270 printk(KERN_INFO "I2O Block: Could not open device\n");
1271 return -EBUSY;
1272 }
1273 DEBUG("Claimed ");
1274
1275 /*
1276 * Mount the media if needed. Note that we don't use
1277 * the lock bit. Since we have to issue a lock if it
1278 * refuses a mount (quite possible) then we might as
1279 * well just send two messages out.
1280 */
1281 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1282 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1283 msg[4] = -1;
1284 msg[5] = 0;
1285 DEBUG("Mount ");
1286 i2o_post_wait(dev->controller, msg, 24, 2);
1287
1288 /*
1289 * Lock the media
1290 */
1291 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1292 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1293 msg[4] = -1;
1294 DEBUG("Lock ");
1295 i2o_post_wait(dev->controller, msg, 20, 2);
1296 DEBUG("Ready.\n");
1297 }
1298 MOD_INC_USE_COUNT;
1299 return 0;
1300 }
1301
1302 /*
1303 * Issue a device query
1304 */
1305
1306 static int i2ob_query_device(struct i2ob_device *dev, int table,
1307 int field, void *buf, int buflen)
1308 {
1309 return i2o_query_scalar(dev->controller, dev->tid,
1310 table, field, buf, buflen);
1311 }
1312
1313
1314 /*
1315 * Install the I2O block device we found.
1316 */
1317
1318 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1319 {
1320 u64 size;
1321 u32 blocksize;
1322 u32 limit;
1323 u8 type;
1324 u32 flags, status;
1325 struct i2ob_device *dev=&i2ob_dev[unit];
1326 int i;
1327
1328 /*
1329 * For logging purposes...
1330 */
1331 printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n",
1332 d->lct_data.tid, unit);
1333
1334 /*
1335 * Ask for the current media data. If that isn't supported
1336 * then we ask for the device capacity data
1337 */
1338 if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1339 || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1340 {
1341 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1342 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1343 }
1344
1345 i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1346 i2ob_query_device(dev, 0x0000, 6, &status, 4);
1347 i2ob_sizes[unit] = (int)(size>>10);
1348 for(i=unit; i <= unit+15 ; i++)
1349 i2ob_hardsizes[i] = blocksize;
1350 i2ob_gendisk.part[unit].nr_sects = size>>9;
1351 i2ob[unit].nr_sects = (int)(size>>9);
1352
1353 /* Set limit based on inbound frame size */
1354 limit = (d->controller->status_block->inbound_frame_size - 8)/2;
1355 limit = limit<<9;
1356
1357 /*
1358 * Max number of Scatter-Gather Elements
1359 */
1360
1361 for(i=unit;i<=unit+15;i++)
1362 {
1363 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy)
1364 {
1365 i2ob_max_sectors[i] = 32;
1366 i2ob_dev[i].max_segments = 8;
1367 i2ob_dev[i].depth = 4;
1368 }
1369 else if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req)
1370 {
1371 i2ob_max_sectors[i] = 8;
1372 i2ob_dev[i].max_segments = 8;
1373 }
1374 else
1375 {
1376 /* MAX_SECTORS was used but 255 is a dumb number for
1377 striped RAID */
1378 i2ob_max_sectors[i]=256;
1379 i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2;
1380 }
1381 }
1382
1383 printk(KERN_INFO "Max segments set to %d\n",
1384 i2ob_dev[unit].max_segments);
1385 printk(KERN_INFO "Byte limit is %d.\n", limit);
1386
1387 i2ob_query_device(dev, 0x0000, 0, &type, 1);
1388
1389 sprintf(d->dev_name, "%s%c", i2ob_gendisk.major_name, 'a' + (unit>>4));
1390
1391 printk(KERN_INFO "%s: ", d->dev_name);
1392 switch(type)
1393 {
1394 case 0: printk("Disk Storage");break;
1395 case 4: printk("WORM");break;
1396 case 5: printk("CD-ROM");break;
1397 case 7: printk("Optical device");break;
1398 default:
1399 printk("Type %d", type);
1400 }
1401 if(status&(1<<10))
1402 printk("(RAID)");
1403 if(((flags & (1<<3)) && !(status & (1<<3))) ||
1404 ((flags & (1<<4)) && !(status & (1<<4))))
1405 {
1406 printk(KERN_INFO " Not loaded.\n");
1407 return 1;
1408 }
1409 printk("- %dMb, %d byte sectors",
1410 (int)(size>>20), blocksize);
1411 if(status&(1<<0))
1412 {
1413 u32 cachesize;
1414 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1415 cachesize>>=10;
1416 if(cachesize>4095)
1417 printk(", %dMb cache", cachesize>>10);
1418 else
1419 printk(", %dKb cache", cachesize);
1420
1421 }
1422 printk(".\n");
1423 printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n",
1424 d->dev_name, i2ob_max_sectors[unit]);
1425
1426 /*
1427 * If this is the first I2O block device found on this IOP,
1428 * we need to initialize all the queue data structures
1429 * before any I/O can be performed. If it fails, this
1430 * device is useless.
1431 */
1432 if(!i2ob_queues[c->unit]) {
1433 if(i2ob_init_iop(c->unit))
1434 return 1;
1435 }
1436
1437 /*
1438 * This will save one level of lookup/indirection in critical
1439 * code so that we can directly get the queue ptr from the
1440 * device instead of having to go the IOP data structure.
1441 */
1442 dev->req_queue = &i2ob_queues[c->unit]->req_queue;
1443
1444 grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9));
1445
1446 /*
1447 * Register for the events we're interested in and that the
1448 * device actually supports.
1449 */
1450 i2o_event_register(c, d->lct_data.tid, i2ob_context, unit,
1451 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1452
1453 return 0;
1454 }
1455
1456 /*
1457 * Initialize IOP specific queue structures. This is called
1458 * once for each IOP that has a block device sitting behind it.
1459 */
1460 static int i2ob_init_iop(unsigned int unit)
1461 {
1462 int i;
1463
1464 i2ob_queues[unit] = (struct i2ob_iop_queue*)
1465 kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1466 if(!i2ob_queues[unit])
1467 {
1468 printk(KERN_WARNING
1469 "Could not allocate request queue for I2O block device!\n");
1470 return -1;
1471 }
1472
1473 for(i = 0; i< MAX_I2OB_DEPTH; i++)
1474 {
1475 i2ob_queues[unit]->request_queue[i].next =
1476 &i2ob_queues[unit]->request_queue[i+1];
1477 i2ob_queues[unit]->request_queue[i].num = i;
1478 }
1479
1480 /* Queue is MAX_I2OB + 1... */
1481 i2ob_queues[unit]->request_queue[i].next = NULL;
1482 i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1483 atomic_set(&i2ob_queues[unit]->queue_depth, 0);
1484
1485 blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request);
1486 blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0);
1487 i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge;
1488 i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge;
1489 i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests;
1490 i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit];
1491
1492 return 0;
1493 }
1494
1495 /*
1496 * Get the request queue for the given device.
1497 */
1498 static request_queue_t* i2ob_get_queue(kdev_t dev)
1499 {
1500 int unit = MINOR(dev)&0xF0;
1501
1502 return i2ob_dev[unit].req_queue;
1503 }
1504
1505 /*
1506 * Probe the I2O subsytem for block class devices
1507 */
1508 static void i2ob_scan(int bios)
1509 {
1510 int i;
1511 int warned = 0;
1512
1513 struct i2o_device *d, *b=NULL;
1514 struct i2o_controller *c;
1515 struct i2ob_device *dev;
1516
1517 for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1518 {
1519 c=i2o_find_controller(i);
1520
1521 if(c==NULL)
1522 continue;
1523
1524 /*
1525 * The device list connected to the I2O Controller is doubly linked
1526 * Here we traverse the end of the list , and start claiming devices
1527 * from that end. This assures that within an I2O controller atleast
1528 * the newly created volumes get claimed after the older ones, thus
1529 * mapping to same major/minor (and hence device file name) after
1530 * every reboot.
1531 * The exception being:
1532 * 1. If there was a TID reuse.
1533 * 2. There was more than one I2O controller.
1534 */
1535
1536 if(!bios)
1537 {
1538 for (d=c->devices;d!=NULL;d=d->next)
1539 if(d->next == NULL)
1540 b = d;
1541 }
1542 else
1543 b = c->devices;
1544
1545 while(b != NULL)
1546 {
1547 d=b;
1548 if(bios)
1549 b = b->next;
1550 else
1551 b = b->prev;
1552
1553 if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1554 continue;
1555
1556 if(d->lct_data.user_tid != 0xFFF)
1557 continue;
1558
1559 if(bios)
1560 {
1561 if(d->lct_data.bios_info != 0x80)
1562 continue;
1563 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1564 }
1565 else
1566 {
1567 if(d->lct_data.bios_info == 0x80)
1568 continue; /*Already claimed on pass 1 */
1569 }
1570
1571 if(i2o_claim_device(d, &i2o_block_handler))
1572 {
1573 printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
1574 d->lct_data.tid);
1575 printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
1576 continue;
1577 }
1578
1579 if(scan_unit<MAX_I2OB<<4)
1580 {
1581 /*
1582 * Get the device and fill in the
1583 * Tid and controller.
1584 */
1585 dev=&i2ob_dev[scan_unit];
1586 dev->i2odev = d;
1587 dev->controller = c;
1588 dev->unit = c->unit;
1589 dev->tid = d->lct_data.tid;
1590
1591 if(i2ob_install_device(c,d,scan_unit))
1592 printk(KERN_WARNING "Could not install I2O block device\n");
1593 else
1594 {
1595 scan_unit+=16;
1596 i2ob_dev_count++;
1597
1598 /* We want to know when device goes away */
1599 i2o_device_notify_on(d, &i2o_block_handler);
1600 }
1601 }
1602 else
1603 {
1604 if(!warned++)
1605 printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
1606 }
1607 i2o_release_device(d, &i2o_block_handler);
1608 }
1609 i2o_unlock_controller(c);
1610 }
1611 }
1612
1613 static void i2ob_probe(void)
1614 {
1615 /*
1616 * Some overhead/redundancy involved here, while trying to
1617 * claim the first boot volume encountered as /dev/i2o/hda
1618 * everytime. All the i2o_controllers are searched and the
1619 * first i2o block device marked as bootable is claimed
1620 * If an I2O block device was booted off , the bios sets
1621 * its bios_info field to 0x80, this what we search for.
1622 * Assuming that the bootable volume is /dev/i2o/hda
1623 * everytime will prevent any kernel panic while mounting
1624 * root partition
1625 */
1626
1627 printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1628 i2ob_scan(1);
1629
1630 /*
1631 * Now the remainder.
1632 */
1633 printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1634 i2ob_scan(0);
1635 }
1636
1637
1638 /*
1639 * New device notification handler. Called whenever a new
1640 * I2O block storage device is added to the system.
1641 *
1642 * Should we spin lock around this to keep multiple devs from
1643 * getting updated at the same time?
1644 *
1645 */
1646 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1647 {
1648 struct i2ob_device *dev;
1649 int unit = 0;
1650
1651 printk(KERN_INFO "i2o_block: New device detected\n");
1652 printk(KERN_INFO " Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1653
1654 /* Check for available space */
1655 if(i2ob_dev_count>=MAX_I2OB<<4)
1656 {
1657 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1658 return;
1659 }
1660 for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
1661 {
1662 if(!i2ob_dev[unit].i2odev)
1663 break;
1664 }
1665
1666 if(i2o_claim_device(d, &i2o_block_handler))
1667 {
1668 printk(KERN_INFO
1669 "i2o_block: Unable to claim device. Installation aborted\n");
1670 return;
1671 }
1672
1673 dev = &i2ob_dev[unit];
1674 dev->i2odev = d;
1675 dev->controller = c;
1676 dev->tid = d->lct_data.tid;
1677
1678 if(i2ob_install_device(c,d,unit))
1679 printk(KERN_ERR "i2o_block: Could not install new device\n");
1680 else
1681 {
1682 i2ob_dev_count++;
1683 i2o_device_notify_on(d, &i2o_block_handler);
1684 }
1685
1686 i2o_release_device(d, &i2o_block_handler);
1687
1688 return;
1689 }
1690
1691 /*
1692 * Deleted device notification handler. Called when a device we
1693 * are talking to has been deleted by the user or some other
1694 * mysterious fource outside the kernel.
1695 */
1696 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1697 {
1698 int unit = 0;
1699 int i = 0;
1700 unsigned long flags;
1701
1702 spin_lock_irqsave(&io_request_lock, flags);
1703
1704 /*
1705 * Need to do this...we somtimes get two events from the IRTOS
1706 * in a row and that causes lots of problems.
1707 */
1708 i2o_device_notify_off(d, &i2o_block_handler);
1709
1710 printk(KERN_INFO "I2O Block Device Deleted\n");
1711
1712 for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
1713 {
1714 if(i2ob_dev[unit].i2odev == d)
1715 {
1716 printk(KERN_INFO " /dev/%s: Controller %d Tid %d\n",
1717 d->dev_name, c->unit, d->lct_data.tid);
1718 break;
1719 }
1720 }
1721 if(unit >= MAX_I2OB<<4)
1722 {
1723 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1724 spin_unlock_irqrestore(&io_request_lock, flags);
1725 return;
1726 }
1727
1728 /*
1729 * This will force errors when i2ob_get_queue() is called
1730 * by the kenrel.
1731 */
1732 i2ob_dev[unit].req_queue = NULL;
1733 for(i = unit; i <= unit+15; i++)
1734 {
1735 i2ob_dev[i].i2odev = NULL;
1736 i2ob_sizes[i] = 0;
1737 i2ob_hardsizes[i] = 0;
1738 i2ob_max_sectors[i] = 0;
1739 i2ob[i].nr_sects = 0;
1740 i2ob_gendisk.part[i].nr_sects = 0;
1741 }
1742 spin_unlock_irqrestore(&io_request_lock, flags);
1743
1744 /*
1745 * Sync the device...this will force all outstanding I/Os
1746 * to attempt to complete, thus causing error messages.
1747 * We have to do this as the user could immediatelly create
1748 * a new volume that gets assigned the same minor number.
1749 * If there are still outstanding writes to the device,
1750 * that could cause data corruption on the new volume!
1751 *
1752 * The truth is that deleting a volume that you are currently
1753 * accessing will do _bad things_ to your system. This
1754 * handler will keep it from crashing, but must probably
1755 * you'll have to do a 'reboot' to get the system running
1756 * properly. Deleting disks you are using is dumb.
1757 * Umount them first and all will be good!
1758 *
1759 * It's not this driver's job to protect the system from
1760 * dumb user mistakes :)
1761 */
1762 if(i2ob_dev[unit].refcnt)
1763 fsync_dev(MKDEV(MAJOR_NR,unit));
1764
1765 /*
1766 * Decrease usage count for module
1767 */
1768 while(i2ob_dev[unit].refcnt--)
1769 MOD_DEC_USE_COUNT;
1770
1771 i2ob_dev[unit].refcnt = 0;
1772
1773 i2ob_dev[i].tid = 0;
1774
1775 /*
1776 * Do we need this?
1777 * The media didn't really change...the device is just gone
1778 */
1779 i2ob_media_change_flag[unit] = 1;
1780
1781 i2ob_dev_count--;
1782 }
1783
1784 /*
1785 * Have we seen a media change ?
1786 */
1787 static int i2ob_media_change(kdev_t dev)
1788 {
1789 int i=MINOR(dev);
1790 i>>=4;
1791 if(i2ob_media_change_flag[i])
1792 {
1793 i2ob_media_change_flag[i]=0;
1794 return 1;
1795 }
1796 return 0;
1797 }
1798
1799 static int i2ob_revalidate(kdev_t dev)
1800 {
1801 return do_i2ob_revalidate(dev, 0);
1802 }
1803
1804 /*
1805 * Reboot notifier. This is called by i2o_core when the system
1806 * shuts down.
1807 */
1808 static void i2ob_reboot_event(void)
1809 {
1810 int i;
1811
1812 for(i=0;i<MAX_I2OB;i++)
1813 {
1814 struct i2ob_device *dev=&i2ob_dev[(i<<4)];
1815
1816 if(dev->refcnt!=0)
1817 {
1818 /*
1819 * Flush the onboard cache
1820 */
1821 u32 msg[5];
1822 int *query_done = &dev->done_flag;
1823 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1824 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1825 msg[2] = i2ob_context|0x40000000;
1826 msg[3] = (u32)query_done;
1827 msg[4] = 60<<16;
1828
1829 DEBUG("Flushing...");
1830 i2o_post_wait(dev->controller, msg, 20, 60);
1831
1832 DEBUG("Unlocking...");
1833 /*
1834 * Unlock the media
1835 */
1836 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1837 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1838 msg[2] = i2ob_context|0x40000000;
1839 msg[3] = (u32)query_done;
1840 msg[4] = -1;
1841 i2o_post_wait(dev->controller, msg, 20, 2);
1842
1843 DEBUG("Unlocked.\n");
1844 }
1845 }
1846 }
1847
1848 static struct block_device_operations i2ob_fops =
1849 {
1850 open: i2ob_open,
1851 release: i2ob_release,
1852 ioctl: i2ob_ioctl,
1853 check_media_change: i2ob_media_change,
1854 revalidate: i2ob_revalidate,
1855 };
1856
1857 static struct gendisk i2ob_gendisk =
1858 {
1859 major: MAJOR_NR,
1860 major_name: "i2o/hd",
1861 minor_shift: 4,
1862 max_p: 1<<4,
1863 part: i2ob,
1864 sizes: i2ob_sizes,
1865 nr_real: MAX_I2OB,
1866 fops: &i2ob_fops,
1867 };
1868
1869
1870 /*
1871 * And here should be modules and kernel interface
1872 * (Just smiley confuses emacs :-)
1873 */
1874
1875 #ifdef MODULE
1876 #define i2o_block_init init_module
1877 #endif
1878
1879 int i2o_block_init(void)
1880 {
1881 int i;
1882
1883 printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1884 printk(KERN_INFO " (c) Copyright 1999-2001 Red Hat Software.\n");
1885
1886 /*
1887 * Register the block device interfaces
1888 */
1889
1890 if (register_blkdev(MAJOR_NR, "i2o_block", &i2ob_fops)) {
1891 printk(KERN_ERR "Unable to get major number %d for i2o_block\n",
1892 MAJOR_NR);
1893 return -EIO;
1894 }
1895 #ifdef MODULE
1896 printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1897 #endif
1898
1899 /*
1900 * Now fill in the boiler plate
1901 */
1902
1903 blksize_size[MAJOR_NR] = i2ob_blksizes;
1904 hardsect_size[MAJOR_NR] = i2ob_hardsizes;
1905 blk_size[MAJOR_NR] = i2ob_sizes;
1906 max_sectors[MAJOR_NR] = i2ob_max_sectors;
1907 blk_dev[MAJOR_NR].queue = i2ob_get_queue;
1908
1909 blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request);
1910 blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
1911
1912 for (i = 0; i < MAX_I2OB << 4; i++) {
1913 i2ob_dev[i].refcnt = 0;
1914 i2ob_dev[i].flags = 0;
1915 i2ob_dev[i].controller = NULL;
1916 i2ob_dev[i].i2odev = NULL;
1917 i2ob_dev[i].tid = 0;
1918 i2ob_dev[i].head = NULL;
1919 i2ob_dev[i].tail = NULL;
1920 i2ob_dev[i].depth = MAX_I2OB_DEPTH;
1921 i2ob_blksizes[i] = 1024;
1922 i2ob_max_sectors[i] = 2;
1923 }
1924
1925 /*
1926 * Set up the queue
1927 */
1928 for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1929 {
1930 i2ob_queues[i] = NULL;
1931 }
1932
1933 /*
1934 * Timers
1935 */
1936
1937 init_timer(&i2ob_timer);
1938 i2ob_timer.function = i2ob_timer_handler;
1939 i2ob_timer.data = 0;
1940
1941 /*
1942 * Register the OSM handler as we will need this to probe for
1943 * drives, geometry and other goodies.
1944 */
1945
1946 if(i2o_install_handler(&i2o_block_handler)<0)
1947 {
1948 unregister_blkdev(MAJOR_NR, "i2o_block");
1949 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1950 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1951 return -EINVAL;
1952 }
1953 i2ob_context = i2o_block_handler.context;
1954
1955 /*
1956 * Initialize event handling thread
1957 */
1958 init_MUTEX_LOCKED(&i2ob_evt_sem);
1959 evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1960 if(evt_pid < 0)
1961 {
1962 printk(KERN_ERR
1963 "i2o_block: Could not initialize event thread. Aborting\n");
1964 i2o_remove_handler(&i2o_block_handler);
1965 return 0;
1966 }
1967
1968 /*
1969 * Finally see what is actually plugged in to our controllers
1970 */
1971 for (i = 0; i < MAX_I2OB; i++)
1972 register_disk(&i2ob_gendisk, MKDEV(MAJOR_NR,i<<4), 1<<4,
1973 &i2ob_fops, 0);
1974 i2ob_probe();
1975
1976 /*
1977 * Adding i2ob_gendisk into the gendisk list.
1978 */
1979 add_gendisk(&i2ob_gendisk);
1980
1981 return 0;
1982 }
1983
1984 #ifdef MODULE
1985
1986 EXPORT_NO_SYMBOLS;
1987 MODULE_AUTHOR("Red Hat Software");
1988 MODULE_DESCRIPTION("I2O Block Device OSM");
1989
1990 void cleanup_module(void)
1991 {
1992 struct gendisk *gdp;
1993 int i;
1994
1995 if(evt_running) {
1996 printk(KERN_INFO "Killing I2O block threads...");
1997 i = kill_proc(evt_pid, SIGTERM, 1);
1998 if(!i) {
1999 printk("waiting...");
2000 }
2001 /* Be sure it died */
2002 wait_for_completion(&i2ob_thread_dead);
2003 printk("done.\n");
2004 }
2005
2006 /*
2007 * Unregister for updates from any devices..otherwise we still
2008 * get them and the core jumps to random memory :O
2009 */
2010 if(i2ob_dev_count) {
2011 struct i2o_device *d;
2012 for(i = 0; i < MAX_I2OB; i++)
2013 if((d=i2ob_dev[i<<4].i2odev)) {
2014 i2o_device_notify_off(d, &i2o_block_handler);
2015 i2o_event_register(d->controller, d->lct_data.tid,
2016 i2ob_context, i<<4, 0);
2017 }
2018 }
2019
2020 /*
2021 * We may get further callbacks for ourself. The i2o_core
2022 * code handles this case reasonably sanely. The problem here
2023 * is we shouldn't get them .. but a couple of cards feel
2024 * obliged to tell us stuff we dont care about.
2025 *
2026 * This isnt ideal at all but will do for now.
2027 */
2028
2029 set_current_state(TASK_UNINTERRUPTIBLE);
2030 schedule_timeout(HZ);
2031
2032 /*
2033 * Flush the OSM
2034 */
2035
2036 i2o_remove_handler(&i2o_block_handler);
2037
2038 /*
2039 * Return the block device
2040 */
2041 if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
2042 printk("i2o_block: cleanup_module failed\n");
2043
2044 /*
2045 * free request queue
2046 */
2047 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
2048
2049 del_gendisk(&i2ob_gendisk);
2050 }
2051 #endif
2052