File: /usr/src/linux/fs/reiserfs/inode.c

1     /*
2      * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3      */
4     
5     #include <linux/config.h>
6     #include <linux/sched.h>
7     #include <linux/reiserfs_fs.h>
8     #include <linux/locks.h>
9     #include <linux/smp_lock.h>
10     #include <asm/uaccess.h>
11     
12     /* args for the create parameter of reiserfs_get_block */
13     #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
14     #define GET_BLOCK_CREATE 1    /* add anything you need to find block */
15     #define GET_BLOCK_NO_HOLE 2   /* return -ENOENT for file holes */
16     #define GET_BLOCK_READ_DIRECT 4  /* read the tail if indirect item not found */
17     #define GET_BLOCK_NO_ISEM     8 /* i_sem is not held, don't preallocate */
18     
19     //
20     // initially this function was derived from minix or ext2's analog and
21     // evolved as the prototype did
22     //
23     void reiserfs_delete_inode (struct inode * inode)
24     {
25         int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2; 
26         int windex ;
27         struct reiserfs_transaction_handle th ;
28     
29       
30         lock_kernel() ; 
31     
32         /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
33         if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
34     	down (&inode->i_sem); 
35     
36     	journal_begin(&th, inode->i_sb, jbegin_count) ;
37     	windex = push_journal_writer("delete_inode") ;
38     
39     	reiserfs_delete_object (&th, inode);
40     	pop_journal_writer(windex) ;
41     	reiserfs_release_objectid (&th, inode->i_ino);
42     
43     	journal_end(&th, inode->i_sb, jbegin_count) ;
44     
45     	up (&inode->i_sem); 
46         } else {
47     	/* no object items are in the tree */
48     	;
49         }
50         clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
51         inode->i_blocks = 0;
52         unlock_kernel() ;
53     }
54     
55     static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, 
56     			   loff_t offset, int type, int length)
57     {
58       key->version = version;
59     
60       key->on_disk_key.k_dir_id = dirid;
61       key->on_disk_key.k_objectid = objectid;
62       set_cpu_key_k_offset (key, offset);
63       set_cpu_key_k_type (key, type);  
64       key->key_length = length;
65     }
66     
67     
68     /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
69        offset and type of key */
70     void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
71     		   int type, int length)
72     {
73       _make_cpu_key (key, inode_items_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
74     					  le32_to_cpu (INODE_PKEY (inode)->k_objectid), 
75     		 offset, type, length);
76     }
77     
78     
79     //
80     // when key is 0, do not set version and short key
81     //
82     inline void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version,
83     			       loff_t offset, int type, int length, int entry_count/*or ih_free_space*/)
84     {
85         if (key) {
86     	ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
87     	ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
88         }
89         ih->ih_version = cpu_to_le16 (version);
90         set_le_ih_k_offset (ih, offset);
91         set_le_ih_k_type (ih, type);
92         ih->ih_item_len = cpu_to_le16 (length);
93         /*    set_ih_free_space (ih, 0);*/
94         // for directory items it is entry count, for directs and stat
95         // datas - 0xffff, for indirects - 0
96         ih->u.ih_entry_count = cpu_to_le16 (entry_count);
97     }
98     
99     static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
100         struct inode *jinode = &(SB_JOURNAL(inode->i_sb)->j_dummy_inode) ;
101     
102         buffer_insert_inode_queue(bh, jinode) ;
103     }
104     
105     //
106     // FIXME: we might cache recently accessed indirect item (or at least
107     // first 15 pointers just like ext2 does
108     
109     // Ugh.  Not too eager for that....
110     //  I cut the code until such time as I see a convincing argument (benchmark).
111     // I don't want a bloated inode struct..., and I don't like code complexity....
112     
113     /* cutting the code is fine, since it really isn't in use yet and is easy
114     ** to add back in.  But, Vladimir has a really good idea here.  Think
115     ** about what happens for reading a file.  For each page,
116     ** The VFS layer calls reiserfs_readpage, who searches the tree to find
117     ** an indirect item.  This indirect item has X number of pointers, where
118     ** X is a big number if we've done the block allocation right.  But,
119     ** we only use one or two of these pointers during each call to readpage,
120     ** needlessly researching again later on.
121     **
122     ** The size of the cache could be dynamic based on the size of the file.
123     **
124     ** I'd also like to see us cache the location the stat data item, since
125     ** we are needlessly researching for that frequently.
126     **
127     ** --chris
128     */
129     
130     /* If this page has a file tail in it, and
131     ** it was read in by get_block_create_0, the page data is valid,
132     ** but tail is still sitting in a direct item, and we can't write to
133     ** it.  So, look through this page, and check all the mapped buffers
134     ** to make sure they have valid block numbers.  Any that don't need
135     ** to be unmapped, so that block_prepare_write will correctly call
136     ** reiserfs_get_block to convert the tail into an unformatted node
137     */
138     static inline void fix_tail_page_for_writing(struct page *page) {
139         struct buffer_head *head, *next, *bh ;
140     
141         if (page && page->buffers) {
142     	head = page->buffers ;
143     	bh = head ;
144     	do {
145     	    next = bh->b_this_page ;
146     	    if (buffer_mapped(bh) && bh->b_blocknr == 0) {
147     	        reiserfs_unmap_buffer(bh) ;
148     	    }
149     	    bh = next ;
150     	} while (bh != head) ;
151         }
152     }
153     
154     
155     
156     
157     /* we need to allocate a block for new unformatted node.  Try to figure out
158        what point in bitmap reiserfs_new_blocknrs should start from. */
159     static b_blocknr_t find_tag (struct buffer_head * bh, struct item_head * ih,
160     			     __u32 * item, int pos_in_item)
161     {
162       if (!is_indirect_le_ih (ih))
163     	 /* something more complicated could be here */
164     	 return bh->b_blocknr;
165     
166       /* for indirect item: go to left and look for the first non-hole entry in
167     	  the indirect item */
168       if (pos_in_item == I_UNFM_NUM (ih))
169     	 pos_in_item --;
170       while (pos_in_item >= 0) {
171     	 if (item [pos_in_item])
172     		return item [pos_in_item];
173     	 pos_in_item --;
174       }
175       return bh->b_blocknr;
176     }
177     
178     
179     /* reiserfs_get_block does not need to allocate a block only if it has been
180        done already or non-hole position has been found in the indirect item */
181     static inline int allocation_needed (int retval, b_blocknr_t allocated, 
182     				     struct item_head * ih,
183     				     __u32 * item, int pos_in_item)
184     {
185       if (allocated)
186     	 return 0;
187       if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && item[pos_in_item])
188     	 return 0;
189       return 1;
190     }
191     
192     static inline int indirect_item_found (int retval, struct item_head * ih)
193     {
194       return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
195     }
196     
197     
198     static inline void set_block_dev_mapped (struct buffer_head * bh, 
199     					 b_blocknr_t block, struct inode * inode)
200     {
201       bh->b_dev = inode->i_dev;
202       bh->b_blocknr = block;
203       bh->b_state |= (1UL << BH_Mapped);
204     }
205     
206     
207     //
208     // files which were created in the earlier version can not be longer,
209     // than 2 gb
210     //
211     int file_capable (struct inode * inode, long block)
212     {
213         if (inode_items_version (inode) != ITEM_VERSION_1 || // it is new file.
214     	block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
215     	return 1;
216     
217         return 0;
218     }
219     
220     /*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
221     				struct inode *inode, struct path *path) {
222       struct super_block *s = th->t_super ;
223       int len = th->t_blocks_allocated ;
224     
225       pathrelse(path) ;
226       reiserfs_update_sd(th, inode) ;
227       journal_end(th, s, len) ;
228       journal_begin(th, s, len) ;
229     }
230     
231     // it is called by get_block when create == 0. Returns block number
232     // for 'block'-th logical block of file. When it hits direct item it
233     // returns 0 (being called from bmap) or read direct item into piece
234     // of page (bh_result)
235     
236     // Please improve the english/clarity in the comment above, as it is
237     // hard to understand.
238     
239     static int _get_block_create_0 (struct inode * inode, long block,
240     				 struct buffer_head * bh_result,
241     				 int args)
242     {
243         INITIALIZE_PATH (path);
244         struct cpu_key key;
245         struct buffer_head * bh;
246         struct item_head * ih, tmp_ih;
247         int fs_gen ;
248         int blocknr;
249         char * p = NULL;
250         int chars;
251         int ret ;
252         int done = 0 ;
253         unsigned long offset ;
254     
255         // prepare the key to look for the 'block'-th block of file
256         make_cpu_key (&key, inode,
257     		  (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
258     
259     research:
260         if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
261     	pathrelse (&path);
262             if (p)
263                 kunmap(bh_result->b_page) ;
264     	if ((args & GET_BLOCK_NO_HOLE)) {
265     	    return -ENOENT ;
266     	}
267             return 0 ;
268         }
269         
270         //
271         bh = get_last_bh (&path);
272         ih = get_ih (&path);
273         if (is_indirect_le_ih (ih)) {
274     	__u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
275     	
276     	/* FIXME: here we could cache indirect item or part of it in
277     	   the inode to avoid search_by_key in case of subsequent
278     	   access to file */
279     	blocknr = le32_to_cpu (ind_item [path.pos_in_item]);
280     	ret = 0 ;
281     	if (blocknr) {
282     	    bh_result->b_dev = inode->i_dev;
283     	    bh_result->b_blocknr = blocknr;
284     	    bh_result->b_state |= (1UL << BH_Mapped);
285     	} else if ((args & GET_BLOCK_NO_HOLE)) {
286     	    ret = -ENOENT ;
287     	}
288     	pathrelse (&path);
289             if (p)
290                 kunmap(bh_result->b_page) ;
291     	return ret ;
292         }
293     
294         // requested data are in direct item(s)
295         if (!(args & GET_BLOCK_READ_DIRECT)) {
296     	// we are called by bmap. FIXME: we can not map block of file
297     	// when it is stored in direct item(s)
298     	pathrelse (&path);	
299             if (p)
300                 kunmap(bh_result->b_page) ;
301     	return -ENOENT;
302         }
303     
304         /* if we've got a direct item, and the buffer was uptodate,
305         ** we don't want to pull data off disk again.  skip to the
306         ** end, where we map the buffer and return
307         */
308         if (buffer_uptodate(bh_result)) {
309             goto finished ;
310         }
311     
312         // read file tail into part of page
313         offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
314         fs_gen = get_generation(inode->i_sb) ;
315         copy_item_head (&tmp_ih, ih);
316     
317         /* we only want to kmap if we are reading the tail into the page.
318         ** this is not the common case, so we don't kmap until we are
319         ** sure we need to.  But, this means the item might move if
320         ** kmap schedules
321         */
322         if (!p) {
323     	p = (char *)kmap(bh_result->b_page) ;
324     	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
325     	    goto research;
326     	}
327         }
328         p += offset ;
329         memset (p, 0, inode->i_sb->s_blocksize);
330         do {
331     	if (!is_direct_le_ih (ih)) {
332     	    BUG ();
333             }
334     	/* make sure we don't read more bytes than actually exist in
335     	** the file.  This can happen in odd cases where i_size isn't
336     	** correct, and when direct item padding results in a few 
337     	** extra bytes at the end of the direct item
338     	*/
339             if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
340     	    break ;
341     	if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
342     	    chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
343     	    done = 1 ;
344     	} else {
345     	    chars = le16_to_cpu (ih->ih_item_len) - path.pos_in_item;
346     	}
347     	memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
348     
349     	if (done) 
350     	    break ;
351     
352     	p += chars;
353     
354     	if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
355     	    // we done, if read direct item is not the last item of
356     	    // node FIXME: we could try to check right delimiting key
357     	    // to see whether direct item continues in the right
358     	    // neighbor or rely on i_size
359     	    break;
360     
361     	// update key to look for the next piece
362     	set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
363     	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
364     	    // we read something from tail, even if now we got IO_ERROR
365     	    break;
366     	bh = get_last_bh (&path);
367     	ih = get_ih (&path);
368         } while (1);
369     
370         flush_dcache_page(bh_result->b_page) ;
371         kunmap(bh_result->b_page) ;
372     
373     finished:
374         pathrelse (&path);
375         bh_result->b_blocknr = 0 ;
376         bh_result->b_dev = inode->i_dev;
377         mark_buffer_uptodate (bh_result, 1);
378         bh_result->b_state |= (1UL << BH_Mapped);
379         return 0;
380     }
381     
382     
383     // this is called to create file map. So, _get_block_create_0 will not
384     // read direct item
385     int reiserfs_bmap (struct inode * inode, long block,
386     		   struct buffer_head * bh_result, int create)
387     {
388         if (!file_capable (inode, block))
389     	return -EFBIG;
390     
391         lock_kernel() ;
392         /* do not read the direct item */
393         _get_block_create_0 (inode, block, bh_result, 0) ;
394         unlock_kernel() ;
395         return 0;
396     }
397     
398     /* special version of get_block that is only used by grab_tail_page right
399     ** now.  It is sent to block_prepare_write, and when you try to get a
400     ** block past the end of the file (or a block from a hole) it returns
401     ** -ENOENT instead of a valid buffer.  block_prepare_write expects to
402     ** be able to do i/o on the buffers returned, unless an error value
403     ** is also returned.
404     ** 
405     ** So, this allows block_prepare_write to be used for reading a single block
406     ** in a page.  Where it does not produce a valid page for holes, or past the
407     ** end of the file.  This turns out to be exactly what we need for reading
408     ** tails for conversion.
409     **
410     ** The point of the wrapper is forcing a certain value for create, even
411     ** though the VFS layer is calling this function with create==1.  If you 
412     ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 
413     ** don't use this function.
414     */
415     static int reiserfs_get_block_create_0 (struct inode * inode, long block,
416     			struct buffer_head * bh_result, int create) {
417         return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
418     }
419     
420     /*
421     ** helper function for when reiserfs_get_block is called for a hole
422     ** but the file tail is still in a direct item
423     ** bh_result is the buffer head for the hole
424     ** tail_offset is the offset of the start of the tail in the file
425     **
426     ** This calls prepare_write, which will start a new transaction
427     ** you should not be in a transaction, or have any paths held when you
428     ** call this.
429     */
430     static int convert_tail_for_hole(struct inode *inode, 
431                                      struct buffer_head *bh_result,
432     				 loff_t tail_offset) {
433         unsigned long index ;
434         unsigned long tail_end ; 
435         unsigned long tail_start ;
436         struct page * tail_page ;
437         struct page * hole_page = bh_result->b_page ;
438         int retval = 0 ;
439     
440         if ((tail_offset & (bh_result->b_size - 1)) != 1) 
441             return -EIO ;
442     
443         /* always try to read until the end of the block */
444         tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
445         tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
446     
447         index = tail_offset >> PAGE_CACHE_SHIFT ;
448         if (index != hole_page->index) {
449     	tail_page = grab_cache_page(inode->i_mapping, index) ;
450     	retval = -ENOMEM;
451     	if (!tail_page) {
452     	    goto out ;
453     	}
454         } else {
455             tail_page = hole_page ;
456         }
457     
458         /* we don't have to make sure the conversion did not happen while
459         ** we were locking the page because anyone that could convert
460         ** must first take i_sem.
461         **
462         ** We must fix the tail page for writing because it might have buffers
463         ** that are mapped, but have a block number of 0.  This indicates tail
464         ** data that has been read directly into the page, and block_prepare_write
465         ** won't trigger a get_block in this case.
466         */
467         fix_tail_page_for_writing(tail_page) ;
468         retval = block_prepare_write(tail_page, tail_start, tail_end, 
469                                      reiserfs_get_block) ; 
470         if (retval)
471             goto unlock ;
472     
473         /* tail conversion might change the data in the page */
474         flush_dcache_page(tail_page) ;
475     
476         retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
477     
478     unlock:
479         if (tail_page != hole_page) {
480             UnlockPage(tail_page) ;
481     	page_cache_release(tail_page) ;
482         }
483     out:
484         return retval ;
485     }
486     
487     static inline int _allocate_block(struct reiserfs_transaction_handle *th,
488                                struct inode *inode, 
489     			   b_blocknr_t *allocated_block_nr, 
490     			   unsigned long tag,
491     			   int flags) {
492       
493     #ifdef REISERFS_PREALLOCATE
494         if (!(flags & GET_BLOCK_NO_ISEM)) {
495             return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, tag);
496         }
497     #endif
498         return reiserfs_new_unf_blocknrs (th, allocated_block_nr, tag);
499     }
500     //
501     // initially this function was derived from ext2's analog and evolved
502     // as the prototype did.  You'll need to look at the ext2 version to
503     // determine which parts are derivative, if any, understanding that
504     // there are only so many ways to code to a given interface.
505     //
506     int reiserfs_get_block (struct inode * inode, long block,
507     			struct buffer_head * bh_result, int create)
508     {
509         int repeat, retval;
510         unsigned long tag;
511         b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
512         INITIALIZE_PATH(path);
513         int pos_in_item;
514         struct cpu_key key;
515         struct buffer_head * bh, * unbh = 0;
516         struct item_head * ih, tmp_ih;
517         __u32 * item;
518         int done;
519         int fs_gen;
520         int windex ;
521         struct reiserfs_transaction_handle th ;
522         /* space reserved in transaction batch: 
523             . 3 balancings in direct->indirect conversion
524             . 1 block involved into reiserfs_update_sd()
525            XXX in practically impossible worst case direct2indirect()
526            can incur (much) more that 3 balancings. */
527         int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
528         int version;
529         int transaction_started = 0 ;
530         loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
531     
532     				/* bad.... */
533         lock_kernel() ;
534         th.t_trans_id = 0 ;
535         version = inode_items_version (inode);
536     
537         if (block < 0) {
538     	unlock_kernel();
539     	return -EIO;
540         }
541     
542         if (!file_capable (inode, block)) {
543     	unlock_kernel() ;
544     	return -EFBIG;
545         }
546     
547         /* if !create, we aren't changing the FS, so we don't need to
548         ** log anything, so we don't need to start a transaction
549         */
550         if (!(create & GET_BLOCK_CREATE)) {
551     	int ret ;
552     	/* find number of block-th logical block of the file */
553     	ret = _get_block_create_0 (inode, block, bh_result, 
554     	                           create | GET_BLOCK_READ_DIRECT) ;
555     	unlock_kernel() ;
556     	return ret;
557         }
558     
559         inode->u.reiserfs_i.i_pack_on_close = 1 ;
560     
561         windex = push_journal_writer("reiserfs_get_block") ;
562       
563         /* set the key of the first byte in the 'block'-th block of file */
564         make_cpu_key (&key, inode, new_offset,
565     		  TYPE_ANY, 3/*key length*/);
566         if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
567     	journal_begin(&th, inode->i_sb, jbegin_count) ;
568     	transaction_started = 1 ;
569         }
570      research:
571     
572         retval = search_for_position_by_key (inode->i_sb, &key, &path);
573         if (retval == IO_ERROR) {
574     	retval = -EIO;
575     	goto failure;
576         }
577     	
578         bh = get_last_bh (&path);
579         ih = get_ih (&path);
580         item = get_item (&path);
581         pos_in_item = path.pos_in_item;
582     
583         fs_gen = get_generation (inode->i_sb);
584         copy_item_head (&tmp_ih, ih);
585     
586         if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
587     	/* we have to allocate block for the unformatted node */
588     	tag = find_tag (bh, ih, item, pos_in_item);
589     	if (!transaction_started) {
590     	    pathrelse(&path) ;
591     	    journal_begin(&th, inode->i_sb, jbegin_count) ;
592     	    transaction_started = 1 ;
593     	    goto research ;
594     	}
595     
596     	repeat = _allocate_block(&th, inode, &allocated_block_nr, tag, create);
597     
598     	if (repeat == NO_DISK_SPACE) {
599     	    /* restart the transaction to give the journal a chance to free
600     	    ** some blocks.  releases the path, so we have to go back to
601     	    ** research if we succeed on the second try
602     	    */
603     	    restart_transaction(&th, inode, &path) ; 
604     	    repeat = _allocate_block(&th, inode,&allocated_block_nr,tag,create);
605     
606     	    if (repeat != NO_DISK_SPACE) {
607     		goto research ;
608     	    }
609     	    retval = -ENOSPC;
610     	    goto failure;
611     	}
612     
613     	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
614     	    goto research;
615     	}
616         }
617     
618         if (indirect_item_found (retval, ih)) {
619     	b_blocknr_t unfm_ptr;
620     
621     	/* 'block'-th block is in the file already (there is
622     	   corresponding cell in some indirect item). But it may be
623     	   zero unformatted node pointer (hole) */
624     	unfm_ptr = le32_to_cpu (item[pos_in_item]);
625     	if (unfm_ptr == 0) {
626     	    /* use allocated block to plug the hole */
627     	    reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
628     	    if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
629     		reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
630     		goto research;
631     	    }
632     	    bh_result->b_state |= (1UL << BH_New);
633     	    item[pos_in_item] = cpu_to_le32 (allocated_block_nr);
634     	    unfm_ptr = allocated_block_nr;
635     	    journal_mark_dirty (&th, inode->i_sb, bh);
636     	    inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
637     	    reiserfs_update_sd(&th, inode) ;
638     	}
639     	set_block_dev_mapped(bh_result, unfm_ptr, inode);
640     	pathrelse (&path);
641     	pop_journal_writer(windex) ;
642     	if (transaction_started)
643     	    journal_end(&th, inode->i_sb, jbegin_count) ;
644     
645     	unlock_kernel() ;
646     	 
647     	/* the item was found, so new blocks were not added to the file
648     	** there is no need to make sure the inode is updated with this 
649     	** transaction
650     	*/
651     	return 0;
652         }
653     
654         if (!transaction_started) {
655     	/* if we don't pathrelse, we could vs-3050 on the buffer if
656     	** someone is waiting for it (they can't finish until the buffer
657     	** is released, we can start a new transaction until they finish)
658     	*/
659     	pathrelse(&path) ;
660     	journal_begin(&th, inode->i_sb, jbegin_count) ;
661     	transaction_started = 1 ;
662     	goto research;
663         }
664     
665         /* desired position is not found or is in the direct item. We have
666            to append file with holes up to 'block'-th block converting
667            direct items to indirect one if necessary */
668         done = 0;
669         do {
670     	if (is_statdata_le_ih (ih)) {
671     	    __u32 unp = 0;
672     	    struct cpu_key tmp_key;
673     
674     	    /* indirect item has to be inserted */
675     	    make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, 
676     			       UNFM_P_SIZE, 0/* free_space */);
677     
678     	    if (cpu_key_k_offset (&key) == 1) {
679     		/* we are going to add 'block'-th block to the file. Use
680     		   allocated block for that */
681     		unp = cpu_to_le32 (allocated_block_nr);
682     		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
683     		bh_result->b_state |= (1UL << BH_New);
684     		done = 1;
685     	    }
686     	    tmp_key = key; // ;)
687     	    set_cpu_key_k_offset (&tmp_key, 1);
688     	    PATH_LAST_POSITION(&path) ++;
689     
690     	    retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
691     	    if (retval) {
692     		reiserfs_free_block (&th, allocated_block_nr);
693     		goto failure; // retval == -ENOSPC or -EIO or -EEXIST
694     	    }
695     	    if (unp)
696     		inode->i_blocks += inode->i_sb->s_blocksize / 512;
697     	    //mark_tail_converted (inode);
698     	} else if (is_direct_le_ih (ih)) {
699     	    /* direct item has to be converted */
700     	    loff_t tail_offset;
701     
702     	    tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
703     	    if (tail_offset == cpu_key_k_offset (&key)) {
704     		/* direct item we just found fits into block we have
705                        to map. Convert it into unformatted node: use
706                        bh_result for the conversion */
707     		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
708     		unbh = bh_result;
709     		done = 1;
710     	    } else {
711     		/* we have to padd file tail stored in direct item(s)
712     		   up to block size and convert it to unformatted
713     		   node. FIXME: this should also get into page cache */
714     
715     		pathrelse(&path) ;
716     		journal_end(&th, inode->i_sb, jbegin_count) ;
717     		transaction_started = 0 ;
718     
719     		retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
720     		if (retval) {
721     		    printk("clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
722     		    if (allocated_block_nr)
723     			reiserfs_free_block (&th, allocated_block_nr);
724     		    goto failure ;
725     		}
726     		goto research ;
727     	    }
728     	    retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
729     	    /* it is important the mark_buffer_uptodate is done after
730     	    ** the direct2indirect.  The buffer might contain valid
731     	    ** data newer than the data on disk (read by readpage, changed,
732     	    ** and then sent here by writepage).  direct2indirect needs
733     	    ** to know if unbh was already up to date, so it can decide
734     	    ** if the data in unbh needs to be replaced with data from
735     	    ** the disk
736     	    */
737     	    mark_buffer_uptodate (unbh, 1);
738     	    if (retval) {
739     		reiserfs_free_block (&th, allocated_block_nr);
740     		goto failure;
741     	    }
742     	    /* we've converted the tail, so we must 
743     	    ** flush unbh before the transaction commits
744     	    */
745     	    add_to_flushlist(inode, unbh) ;
746     
747     	    /* mark it dirty now to prevent commit_write from adding
748     	    ** this buffer to the inode's dirty buffer list
749     	    */
750     	    __mark_buffer_dirty(unbh) ;
751     		  
752     	    //inode->i_blocks += inode->i_sb->s_blocksize / 512;
753     	    //mark_tail_converted (inode);
754     	} else {
755     	    /* append indirect item with holes if needed, when appending
756     	       pointer to 'block'-th block use block, which is already
757     	       allocated */
758     	    struct cpu_key tmp_key;
759     	    struct unfm_nodeinfo un = {0, 0};
760     
761     #ifdef CONFIG_REISERFS_CHECK
762     	    if (pos_in_item != le16_to_cpu (ih->ih_item_len) / UNFM_P_SIZE)
763     		reiserfs_panic (inode->i_sb, "vs-: reiserfs_get_block: "
764     				"invalid position for append");
765     #endif
766     	    /* indirect item has to be appended, set up key of that position */
767     	    make_cpu_key (&tmp_key, inode,
768     			  le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
769     			  //pos_in_item * inode->i_sb->s_blocksize,
770     			  TYPE_INDIRECT, 3);// key type is unimportant
771     		  
772     	    if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) {
773     		/* we are going to add target block to the file. Use allocated
774     		   block for that */
775     		un.unfm_nodenum = cpu_to_le32 (allocated_block_nr);
776     		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
777     		bh_result->b_state |= (1UL << BH_New);
778     		done = 1;
779     	    } else {
780     		/* paste hole to the indirect item */
781     	    }
782     	    retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE);
783     	    if (retval) {
784     		reiserfs_free_block (&th, allocated_block_nr);
785     		goto failure;
786     	    }
787     	    if (un.unfm_nodenum)
788     		inode->i_blocks += inode->i_sb->s_blocksize / 512;
789     	    //mark_tail_converted (inode);
790     	}
791     		
792     	if (done == 1)
793     	    break;
794     	 
795     	/* this loop could log more blocks than we had originally asked
796     	** for.  So, we have to allow the transaction to end if it is
797     	** too big or too full.  Update the inode so things are 
798     	** consistent if we crash before the function returns
799     	**
800     	** release the path so that anybody waiting on the path before
801     	** ending their transaction will be able to continue.
802     	*/
803     	if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
804     	  restart_transaction(&th, inode, &path) ; 
805     	}
806     	/* inserting indirect pointers for a hole can take a 
807     	** long time.  reschedule if needed
808     	*/
809     	if (current->need_resched)
810     	    schedule() ;
811     
812     	retval = search_for_position_by_key (inode->i_sb, &key, &path);
813     	if (retval == IO_ERROR) {
814     	    retval = -EIO;
815     	    goto failure;
816     	}
817     	if (retval == POSITION_FOUND) {
818     	    reiserfs_warning ("vs-825: reiserfs_get_block: "
819     			      "%k should not be found\n", &key);
820     	    retval = -EEXIST;
821     	    if (allocated_block_nr)
822     	        reiserfs_free_block (&th, allocated_block_nr);
823     	    pathrelse(&path) ;
824     	    goto failure;
825     	}
826     	bh = get_last_bh (&path);
827     	ih = get_ih (&path);
828     	item = get_item (&path);
829     	pos_in_item = path.pos_in_item;
830         } while (1);
831     
832     
833         retval = 0;
834         reiserfs_check_path(&path) ;
835     
836      failure:
837         if (transaction_started) {
838           reiserfs_update_sd(&th, inode) ;
839           journal_end(&th, inode->i_sb, jbegin_count) ;
840         }
841         pop_journal_writer(windex) ;
842         unlock_kernel() ;
843         reiserfs_check_path(&path) ;
844         return retval;
845     }
846     
847     
848     //
849     // BAD: new directories have stat data of new type and all other items
850     // of old type. Version stored in the inode says about body items, so
851     // in update_stat_data we can not rely on inode, but have to check
852     // item version directly
853     //
854     
855     // called by read_inode
856     static void init_inode (struct inode * inode, struct path * path)
857     {
858         struct buffer_head * bh;
859         struct item_head * ih;
860         __u32 rdev;
861         //int version = ITEM_VERSION_1;
862     
863         bh = PATH_PLAST_BUFFER (path);
864         ih = PATH_PITEM_HEAD (path);
865     
866     
867         copy_key (INODE_PKEY (inode), &(ih->ih_key));
868         inode->i_blksize = PAGE_SIZE;
869     
870         INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
871     
872         if (stat_data_v1 (ih)) {
873     	struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
874     	unsigned long blocks;
875     
876     	inode_items_version (inode) = ITEM_VERSION_1;
877     	inode->i_mode = le16_to_cpu (sd->sd_mode);
878     	inode->i_nlink = le16_to_cpu (sd->sd_nlink);
879     	inode->i_uid = le16_to_cpu (sd->sd_uid);
880     	inode->i_gid = le16_to_cpu (sd->sd_gid);
881     	inode->i_size = le32_to_cpu (sd->sd_size);
882     	inode->i_atime = le32_to_cpu (sd->sd_atime);
883     	inode->i_mtime = le32_to_cpu (sd->sd_mtime);
884     	inode->i_ctime = le32_to_cpu (sd->sd_ctime);
885     
886     	inode->i_blocks = le32_to_cpu (sd->u.sd_blocks);
887     	inode->i_generation = INODE_PKEY (inode)->k_dir_id;
888     	blocks = (inode->i_size + 511) >> 9;
889     	blocks = _ROUND_UP (blocks, inode->i_blksize >> 9);
890     	if (inode->i_blocks > blocks) {
891     	    // there was a bug in <=3.5.23 when i_blocks could take negative
892     	    // values. Starting from 3.5.17 this value could even be stored in
893     	    // stat data. For such files we set i_blocks based on file
894     	    // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
895     	    // only updated if file's inode will ever change
896     	    inode->i_blocks = blocks;
897     	}
898     
899     	rdev = le32_to_cpu (sd->u.sd_rdev);
900     	inode->u.reiserfs_i.i_first_direct_byte = le32_to_cpu (sd->sd_first_direct_byte);
901         } else {
902     	// new stat data found, but object may have old items
903     	// (directories and symlinks)
904     	struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
905     
906     	/* both old and new directories have old keys */
907     	//version = (S_ISDIR (sd->sd_mode) ? ITEM_VERSION_1 : ITEM_VERSION_2);
908     	if (S_ISDIR (sd->sd_mode) || S_ISLNK (sd->sd_mode))
909     	    inode_items_version (inode) = ITEM_VERSION_1;
910     	else
911     	    inode_items_version (inode) = ITEM_VERSION_2;
912     	inode->i_mode = le16_to_cpu (sd->sd_mode);
913     	inode->i_nlink = le32_to_cpu (sd->sd_nlink);
914     	inode->i_uid = le32_to_cpu (sd->sd_uid);
915     	inode->i_size = le64_to_cpu (sd->sd_size);
916     	inode->i_gid = le32_to_cpu (sd->sd_gid);
917     	inode->i_mtime = le32_to_cpu (sd->sd_mtime);
918     	inode->i_atime = le32_to_cpu (sd->sd_atime);
919     	inode->i_ctime = le32_to_cpu (sd->sd_ctime);
920     	inode->i_blocks = le32_to_cpu (sd->sd_blocks);
921     	rdev = le32_to_cpu (sd->u.sd_rdev);
922     	if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
923     	    inode->i_generation = INODE_PKEY (inode)->k_dir_id;
924     	else
925     	    inode->i_generation = le32_to_cpu( sd->u.sd_generation );
926         }
927     
928         /* nopack = 0, by default */
929         inode->u.reiserfs_i.nopack = 0;
930     
931         pathrelse (path);
932         if (S_ISREG (inode->i_mode)) {
933     	inode->i_op = &reiserfs_file_inode_operations;
934     	inode->i_fop = &reiserfs_file_operations;
935     	inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
936         } else if (S_ISDIR (inode->i_mode)) {
937     	inode->i_op = &reiserfs_dir_inode_operations;
938     	inode->i_fop = &reiserfs_dir_operations;
939         } else if (S_ISLNK (inode->i_mode)) {
940     	inode->i_op = &page_symlink_inode_operations;
941     	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
942         } else {
943     	inode->i_blocks = 0;
944     	init_special_inode(inode, inode->i_mode, rdev) ;
945         }
946     }
947     
948     
949     // update new stat data with inode fields
950     static void inode2sd (void * sd, struct inode * inode)
951     {
952         struct stat_data * sd_v2 = (struct stat_data *)sd;
953     
954         sd_v2->sd_mode = cpu_to_le16 (inode->i_mode);
955         sd_v2->sd_nlink = cpu_to_le16 (inode->i_nlink);
956         sd_v2->sd_uid = cpu_to_le32 (inode->i_uid);
957         sd_v2->sd_size = cpu_to_le64 (inode->i_size);
958         sd_v2->sd_gid = cpu_to_le32 (inode->i_gid);
959         sd_v2->sd_mtime = cpu_to_le32 (inode->i_mtime);
960         sd_v2->sd_atime = cpu_to_le32 (inode->i_atime);
961         sd_v2->sd_ctime = cpu_to_le32 (inode->i_ctime);
962         sd_v2->sd_blocks = cpu_to_le32 (inode->i_blocks);
963         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
964     	sd_v2->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
965         } else {
966     	sd_v2->u.sd_generation = cpu_to_le32( inode -> i_generation );
967         }
968     }
969     
970     
971     // used to copy inode's fields to old stat data
972     static void inode2sd_v1 (void * sd, struct inode * inode)
973     {
974         struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
975     
976         sd_v1->sd_mode = cpu_to_le16 (inode->i_mode);
977         sd_v1->sd_uid = cpu_to_le16 (inode->i_uid);
978         sd_v1->sd_gid = cpu_to_le16 (inode->i_gid);
979         sd_v1->sd_nlink = cpu_to_le16 (inode->i_nlink);
980         sd_v1->sd_size = cpu_to_le32 (inode->i_size);
981         sd_v1->sd_atime = cpu_to_le32 (inode->i_atime);
982         sd_v1->sd_ctime = cpu_to_le32 (inode->i_ctime);
983         sd_v1->sd_mtime = cpu_to_le32 (inode->i_mtime);
984         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
985     	sd_v1->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
986         else
987     	sd_v1->u.sd_blocks = cpu_to_le32 (inode->i_blocks);
988     
989         // Sigh. i_first_direct_byte is back
990         sd_v1->sd_first_direct_byte = cpu_to_le32 (inode->u.reiserfs_i.i_first_direct_byte);
991     }
992     
993     
994     /* NOTE, you must prepare the buffer head before sending it here,
995     ** and then log it after the call
996     */
997     static void update_stat_data (struct path * path, struct inode * inode)
998     {
999         struct buffer_head * bh;
1000         struct item_head * ih;
1001       
1002         bh = PATH_PLAST_BUFFER (path);
1003         ih = PATH_PITEM_HEAD (path);
1004     
1005         if (!is_statdata_le_ih (ih))
1006     	reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
1007     			INODE_PKEY (inode), ih);
1008       
1009         if (stat_data_v1 (ih)) {
1010     	// path points to old stat data
1011     	inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1012         } else {
1013     	inode2sd (B_I_PITEM (bh, ih), inode);
1014         }
1015     
1016         return;
1017     }
1018     
1019     
1020     void reiserfs_update_sd (struct reiserfs_transaction_handle *th, 
1021     			 struct inode * inode)
1022     {
1023         struct cpu_key key;
1024         INITIALIZE_PATH(path);
1025         struct buffer_head *bh ;
1026         int fs_gen ;
1027         struct item_head *ih, tmp_ih ;
1028         int retval;
1029     
1030         make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
1031         
1032         for(;;) {
1033     	int pos;
1034     	/* look for the object's stat data */
1035     	retval = search_item (inode->i_sb, &key, &path);
1036     	if (retval == IO_ERROR) {
1037     	    reiserfs_warning ("vs-13050: reiserfs_update_sd: "
1038     			      "i/o failure occurred trying to update %K stat data",
1039     			      &key);
1040     	    return;
1041     	}
1042     	if (retval == ITEM_NOT_FOUND) {
1043     	    pos = PATH_LAST_POSITION (&path);
1044     	    pathrelse(&path) ;
1045     	    if (inode->i_nlink == 0) {
1046     		/*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
1047     		return;
1048     	    }
1049     	    reiserfs_warning ("vs-13060: reiserfs_update_sd: "
1050     			      "stat data of object %k (nlink == %d) not found (pos %d)\n", 
1051     			      INODE_PKEY (inode), inode->i_nlink, pos);
1052     	    reiserfs_check_path(&path) ;
1053     	    return;
1054     	}
1055     	
1056     	/* sigh, prepare_for_journal might schedule.  When it schedules the
1057     	** FS might change.  We have to detect that, and loop back to the
1058     	** search if the stat data item has moved
1059     	*/
1060     	bh = get_last_bh(&path) ;
1061     	ih = get_ih(&path) ;
1062     	copy_item_head (&tmp_ih, ih);
1063     	fs_gen = get_generation (inode->i_sb);
1064     	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1065     	if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1066     	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1067     	    continue ;	/* Stat_data item has been moved after scheduling. */
1068     	}
1069     	break;
1070         }
1071         update_stat_data (&path, inode);
1072         journal_mark_dirty(th, th->t_super, bh) ; 
1073         pathrelse (&path);
1074         return;
1075     }
1076     
1077     void reiserfs_read_inode(struct inode *inode) {
1078         make_bad_inode(inode) ;
1079     }
1080     
1081     
1082     //
1083     // initially this function was derived from minix or ext2's analog and
1084     // evolved as the prototype did
1085     //
1086     
1087     /* looks for stat data in the tree, and fills up the fields of in-core
1088        inode stat data fields */
1089     void reiserfs_read_inode2 (struct inode * inode, void *p)
1090     {
1091         INITIALIZE_PATH (path_to_sd);
1092         struct cpu_key key;
1093         struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ;
1094         unsigned long dirino;
1095         int retval;
1096     
1097         if (!p) {
1098     	make_bad_inode(inode) ;
1099     	return;
1100         }
1101     
1102         dirino = args->objectid ;
1103     
1104         /* set version 1, version 2 could be used too, because stat data
1105            key is the same in both versions */
1106         key.version = ITEM_VERSION_1;
1107         key.on_disk_key.k_dir_id = dirino;
1108         key.on_disk_key.k_objectid = inode->i_ino;
1109         key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
1110         key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
1111     
1112         /* look for the object's stat data */
1113         retval = search_item (inode->i_sb, &key, &path_to_sd);
1114         if (retval == IO_ERROR) {
1115     	reiserfs_warning ("vs-13070: reiserfs_read_inode2: "
1116     			  "i/o failure occurred trying to find stat data of %K\n",
1117     			  &key);
1118     	make_bad_inode(inode) ;
1119     	return;
1120         }
1121         if (retval != ITEM_FOUND) {
1122     	/* a stale NFS handle can trigger this without it being an error */
1123     	pathrelse (&path_to_sd);
1124     	make_bad_inode(inode) ;
1125     	return;
1126         }
1127     
1128         init_inode (inode, &path_to_sd);
1129         reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1130     
1131     }
1132     
1133     
1134     struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key)
1135     {
1136         struct inode * inode;
1137         struct reiserfs_iget4_args args ;
1138     
1139         args.objectid = key->on_disk_key.k_dir_id ;
1140         inode = iget4 (s, key->on_disk_key.k_objectid, 0, (void *)(&args));
1141         if (!inode) 
1142     	return ERR_PTR(-ENOMEM) ;
1143     
1144         if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) {
1145     	/* either due to i/o error or a stale NFS handle */
1146     	iput (inode);
1147     	inode = 0;
1148         }
1149         return inode;
1150     }
1151     
1152     struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data,
1153                                          int len, int fhtype, int parent) {
1154         struct cpu_key key ;
1155         struct inode *inode = NULL ;
1156         struct list_head *lp;
1157         struct dentry *result;
1158     
1159         /* fhtype happens to reflect the number of u32s encoded.
1160          * due to a bug in earlier code, fhtype might indicate there
1161          * are more u32s then actually fitted.
1162          * so if fhtype seems to be more than len, reduce fhtype.
1163          * Valid types are:
1164          *   2 - objectid + dir_id - legacy support
1165          *   3 - objectid + dir_id + generation
1166          *   4 - objectid + dir_id + objectid and dirid of parent - legacy
1167          *   5 - objectid + dir_id + generation + objectid and dirid of parent
1168          *   6 - as above plus generation of directory
1169          * 6 does not fit in NFSv2 handles
1170          */
1171         if (fhtype > len) {
1172     	    if (fhtype != 6 || len != 5)
1173     		    printk(KERN_WARNING "nfsd/reiserfs, fhtype=%d, len=%d - odd\n",
1174     			   fhtype, len);
1175     	    fhtype = 5;
1176         }
1177         if (fhtype < 2 || (parent && fhtype < 4)) 
1178     	goto out ;
1179     
1180         if (! parent) {
1181     	    /* this works for handles from old kernels because the default
1182     	    ** reiserfs generation number is the packing locality.
1183     	    */
1184     	    key.on_disk_key.k_objectid = data[0] ;
1185     	    key.on_disk_key.k_dir_id = data[1] ;
1186     	    inode = reiserfs_iget(sb, &key) ;
1187     	    if (inode && !IS_ERR(inode) && (fhtype == 3 || fhtype >= 5) &&
1188     		data[2] != inode->i_generation) {
1189     		    iput(inode) ;
1190     		    inode = NULL ;
1191     	    }
1192         } else {
1193     	    key.on_disk_key.k_objectid = data[fhtype>=5?3:2] ;
1194     	    key.on_disk_key.k_dir_id = data[fhtype>=5?4:3] ;
1195     	    inode = reiserfs_iget(sb, &key) ;
1196     	    if (inode && !IS_ERR(inode) && fhtype == 6 &&
1197     		data[5] != inode->i_generation) {
1198     		    iput(inode) ;
1199     		    inode = NULL ;
1200     	    }
1201         }
1202     out:
1203         if (IS_ERR(inode))
1204     	return ERR_PTR(PTR_ERR(inode));
1205         if (!inode)
1206             return ERR_PTR(-ESTALE) ;
1207     
1208         /* now to find a dentry.
1209          * If possible, get a well-connected one
1210          */
1211         spin_lock(&dcache_lock);
1212         for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
1213     	    result = list_entry(lp,struct dentry, d_alias);
1214     	    if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
1215     		    dget_locked(result);
1216     		    result->d_vfs_flags |= DCACHE_REFERENCED;
1217     		    spin_unlock(&dcache_lock);
1218     		    iput(inode);
1219     		    return result;
1220     	    }
1221         }
1222         spin_unlock(&dcache_lock);
1223         result = d_alloc_root(inode);
1224         if (result == NULL) {
1225     	    iput(inode);
1226     	    return ERR_PTR(-ENOMEM);
1227         }
1228         result->d_flags |= DCACHE_NFSD_DISCONNECTED;
1229         return result;
1230     
1231     }
1232     
1233     int reiserfs_dentry_to_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) {
1234         struct inode *inode = dentry->d_inode ;
1235         int maxlen = *lenp;
1236         
1237         if (maxlen < 3)
1238             return 255 ;
1239     
1240         data[0] = inode->i_ino ;
1241         data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1242         data[2] = inode->i_generation ;
1243         *lenp = 3 ;
1244         /* no room for directory info? return what we've stored so far */
1245         if (maxlen < 5 || ! need_parent)
1246             return 3 ;
1247     
1248         inode = dentry->d_parent->d_inode ;
1249         data[3] = inode->i_ino ;
1250         data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1251         *lenp = 5 ;
1252         if (maxlen < 6)
1253     	    return 5 ;
1254         data[5] = inode->i_generation ;
1255         *lenp = 6 ;
1256         return 6 ;
1257     }
1258     
1259     
1260     //
1261     // initially this function was derived from minix or ext2's analog and
1262     // evolved as the prototype did
1263     //
1264     /* looks for stat data, then copies fields to it, marks the buffer
1265        containing stat data as dirty */
1266     /* reiserfs inodes are never really dirty, since the dirty inode call
1267     ** always logs them.  This call allows the VFS inode marking routines
1268     ** to properly mark inodes for datasync and such, but only actually
1269     ** does something when called for a synchronous update.
1270     */
1271     void reiserfs_write_inode (struct inode * inode, int do_sync) {
1272         struct reiserfs_transaction_handle th ;
1273         int jbegin_count = 1 ;
1274     
1275         if (inode->i_sb->s_flags & MS_RDONLY) {
1276             reiserfs_warning("clm-6005: writing inode %lu on readonly FS\n", 
1277     	                  inode->i_ino) ;
1278             return ;
1279         }
1280         /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1281         ** these cases are just when the system needs ram, not when the 
1282         ** inode needs to reach disk for safety, and they can safely be
1283         ** ignored because the altered inode has already been logged.
1284         */
1285         if (do_sync && !(current->flags & PF_MEMALLOC)) {
1286     	lock_kernel() ;
1287     	journal_begin(&th, inode->i_sb, jbegin_count) ;
1288     	reiserfs_update_sd (&th, inode);
1289     	journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1290     	unlock_kernel() ;
1291         }
1292     }
1293     
1294     void reiserfs_dirty_inode (struct inode * inode) {
1295         struct reiserfs_transaction_handle th ;
1296     
1297         if (inode->i_sb->s_flags & MS_RDONLY) {
1298             reiserfs_warning("clm-6006: writing inode %lu on readonly FS\n", 
1299     	                  inode->i_ino) ;
1300             return ;
1301         }
1302         lock_kernel() ;
1303         journal_begin(&th, inode->i_sb, 1) ;
1304         reiserfs_update_sd (&th, inode);
1305         journal_end(&th, inode->i_sb, 1) ;
1306         unlock_kernel() ;
1307     }
1308     
1309     
1310     /* FIXME: no need any more. right? */
1311     int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
1312     {
1313       int err = 0;
1314     
1315       reiserfs_update_sd (th, inode);
1316       return err;
1317     }
1318     
1319     
1320     /* stat data of new object is inserted already, this inserts the item
1321        containing "." and ".." entries */
1322     static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, 
1323     				   struct item_head * ih, struct path * path, const struct inode * dir)
1324     {
1325         struct super_block * sb = th->t_super;
1326         char empty_dir [EMPTY_DIR_SIZE];
1327         char * body = empty_dir;
1328         struct cpu_key key;
1329         int retval;
1330         
1331         _make_cpu_key (&key, ITEM_VERSION_1, le32_to_cpu (ih->ih_key.k_dir_id),
1332     		   le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
1333         
1334         /* compose item head for new item. Directories consist of items of
1335            old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1336            is done by reiserfs_new_inode */
1337         if (old_format_only (sb)) {
1338     	make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1339     	
1340     	make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1341     				le32_to_cpu (INODE_PKEY (dir)->k_dir_id), 
1342     				le32_to_cpu (INODE_PKEY (dir)->k_objectid));
1343         } else {
1344     	make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1345     	
1346     	make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1347     			     le32_to_cpu (INODE_PKEY (dir)->k_dir_id), 
1348     			     le32_to_cpu (INODE_PKEY (dir)->k_objectid));
1349         }
1350         
1351         /* look for place in the tree for new item */
1352         retval = search_item (sb, &key, path);
1353         if (retval == IO_ERROR) {
1354     	reiserfs_warning ("vs-13080: reiserfs_new_directory: "
1355     			  "i/o failure occurred creating new directory\n");
1356     	return -EIO;
1357         }
1358         if (retval == ITEM_FOUND) {
1359     	pathrelse (path);
1360     	reiserfs_warning ("vs-13070: reiserfs_new_directory: "
1361     			  "object with this key exists (%k)", &(ih->ih_key));
1362     	return -EEXIST;
1363         }
1364     
1365         /* insert item, that is empty directory item */
1366         return reiserfs_insert_item (th, path, &key, ih, body);
1367     }
1368     
1369     
1370     /* stat data of object has been inserted, this inserts the item
1371        containing the body of symlink */
1372     static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, 
1373     				 struct item_head * ih,
1374     				 struct path * path, const char * symname, int item_len)
1375     {
1376         struct super_block * sb = th->t_super;
1377         struct cpu_key key;
1378         int retval;
1379     
1380         _make_cpu_key (&key, ITEM_VERSION_1, 
1381     		   le32_to_cpu (ih->ih_key.k_dir_id), 
1382     		   le32_to_cpu (ih->ih_key.k_objectid),
1383     		   1, TYPE_DIRECT, 3/*key length*/);
1384     
1385         make_le_item_head (ih, 0, ITEM_VERSION_1, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
1386     
1387         /* look for place in the tree for new item */
1388         retval = search_item (sb, &key, path);
1389         if (retval == IO_ERROR) {
1390     	reiserfs_warning ("vs-13080: reiserfs_new_symlinik: "
1391     			  "i/o failure occurred creating new symlink\n");
1392     	return -EIO;
1393         }
1394         if (retval == ITEM_FOUND) {
1395     	pathrelse (path);
1396     	reiserfs_warning ("vs-13080: reiserfs_new_symlink: "
1397     			  "object with this key exists (%k)", &(ih->ih_key));
1398     	return -EEXIST;
1399         }
1400     
1401         /* insert item, that is body of symlink */
1402         return reiserfs_insert_item (th, path, &key, ih, symname);
1403     }
1404     
1405     
1406     /* inserts the stat data into the tree, and then calls
1407        reiserfs_new_directory (to insert ".", ".." item if new object is
1408        directory) or reiserfs_new_symlink (to insert symlink body if new
1409        object is symlink) or nothing (if new object is regular file) */
1410     struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
1411     				   const struct inode * dir, int mode, 
1412     				   const char * symname, 
1413     				   int i_size, /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1414     						  strlen (symname) for symlinks)*/
1415     				   struct dentry *dentry, struct inode *inode, int * err)
1416     {
1417         struct super_block * sb;
1418         INITIALIZE_PATH (path_to_key);
1419         struct cpu_key key;
1420         struct item_head ih;
1421         struct stat_data sd;
1422         int retval;
1423       
1424         if (!dir || !dir->i_nlink) {
1425     	*err = -EPERM;
1426     	iput(inode) ;
1427     	return NULL;
1428         }
1429     
1430         sb = dir->i_sb;
1431         inode->i_sb = sb;
1432         inode->i_flags = 0;//inode->i_sb->s_flags;
1433     
1434         /* item head of new item */
1435         ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
1436         ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
1437         if (!ih.ih_key.k_objectid) {
1438     	iput(inode) ;
1439     	*err = -ENOMEM;
1440     	return NULL;
1441         }
1442         if (old_format_only (sb))
1443           /* not a perfect generation count, as object ids can be reused, but this
1444           ** is as good as reiserfs can do right now.
1445           ** note that the private part of inode isn't filled in yet, we have
1446           ** to use the directory.
1447           */
1448           inode->i_generation = INODE_PKEY (dir)->k_objectid;
1449         else
1450     #if defined( USE_INODE_GENERATION_COUNTER )
1451           inode->i_generation = 
1452     	le32_to_cpu( sb -> u.reiserfs_sb.s_rs -> s_inode_generation );
1453     #else
1454           inode->i_generation = ++event;
1455     #endif
1456         if (old_format_only (sb))
1457     	make_le_item_head (&ih, 0, ITEM_VERSION_1, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1458         else
1459     	make_le_item_head (&ih, 0, ITEM_VERSION_2, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1460     
1461     
1462         /* key to search for correct place for new stat data */
1463         _make_cpu_key (&key, ITEM_VERSION_2, le32_to_cpu (ih.ih_key.k_dir_id),
1464     		   le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
1465     
1466         /* find proper place for inserting of stat data */
1467         retval = search_item (sb, &key, &path_to_key);
1468         if (retval == IO_ERROR) {
1469     	iput (inode);
1470     	*err = -EIO;
1471     	return NULL;
1472         }
1473         if (retval == ITEM_FOUND) {
1474     	pathrelse (&path_to_key);
1475     	iput (inode);
1476     	*err = -EEXIST;
1477     	return NULL;
1478         }
1479     
1480         /* fill stat data */
1481         inode->i_mode = mode;
1482         inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
1483         inode->i_uid = current->fsuid;
1484         if (dir->i_mode & S_ISGID) {
1485     	inode->i_gid = dir->i_gid;
1486     	if (S_ISDIR(mode))
1487     	    inode->i_mode |= S_ISGID;
1488         } else
1489     	inode->i_gid = current->fsgid;
1490     
1491         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1492         inode->i_size = i_size;
1493         inode->i_blocks = (inode->i_size + 511) >> 9;
1494         inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 : 
1495           U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1496     
1497         INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
1498     
1499         if (old_format_only (sb))
1500     	inode2sd_v1 (&sd, inode);
1501         else
1502     	inode2sd (&sd, inode);
1503     
1504         // these do not go to on-disk stat data
1505         inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1506         inode->i_blksize = PAGE_SIZE;
1507         inode->i_dev = sb->s_dev;
1508       
1509         // store in in-core inode the key of stat data and version all
1510         // object items will have (directory items will have old offset
1511         // format, other new objects will consist of new items)
1512         memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
1513         if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
1514     	inode_items_version (inode) = ITEM_VERSION_1;
1515         else
1516     	inode_items_version (inode) = ITEM_VERSION_2;
1517     
1518         /* insert the stat data into the tree */
1519         retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1520         if (retval) {
1521     	iput (inode);
1522     	*err = retval;
1523     	reiserfs_check_path(&path_to_key) ;
1524     	return NULL;
1525         }
1526     
1527         if (S_ISDIR(mode)) {
1528     	/* insert item with "." and ".." */
1529     	retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1530         }
1531     
1532         if (S_ISLNK(mode)) {
1533     	/* insert body of symlink */
1534     	if (!old_format_only (sb))
1535     	    i_size = ROUND_UP(i_size);
1536     	retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1537         }
1538         if (retval) {
1539           inode->i_nlink = 0;
1540     	iput (inode);
1541     	*err = retval;
1542     	reiserfs_check_path(&path_to_key) ;
1543     	return NULL;
1544         }
1545     
1546         insert_inode_hash (inode);
1547         // we do not mark inode dirty: on disk content matches to the
1548         // in-core one
1549         reiserfs_check_path(&path_to_key) ;
1550     
1551         return inode;
1552     }
1553     
1554     /*
1555     ** finds the tail page in the page cache,
1556     ** reads the last block in.
1557     **
1558     ** On success, page_result is set to a locked, pinned page, and bh_result
1559     ** is set to an up to date buffer for the last block in the file.  returns 0.
1560     **
1561     ** tail conversion is not done, so bh_result might not be valid for writing
1562     ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1563     ** trying to write the block.
1564     **
1565     ** on failure, nonzero is returned, page_result and bh_result are untouched.
1566     */
1567     static int grab_tail_page(struct inode *p_s_inode, 
1568     			  struct page **page_result, 
1569     			  struct buffer_head **bh_result) {
1570     
1571         /* we want the page with the last byte in the file,
1572         ** not the page that will hold the next byte for appending
1573         */
1574         unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
1575         unsigned long pos = 0 ;
1576         unsigned long start = 0 ;
1577         unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
1578         unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
1579         struct buffer_head *bh ;
1580         struct buffer_head *head ;
1581         struct page * page ;
1582         int error ;
1583         
1584         /* we know that we are only called with inode->i_size > 0.
1585         ** we also know that a file tail can never be as big as a block
1586         ** If i_size % blocksize == 0, our file is currently block aligned
1587         ** and it won't need converting or zeroing after a truncate.
1588         */
1589         if ((offset & (blocksize - 1)) == 0) {
1590             return -ENOENT ;
1591         }
1592         page = grab_cache_page(p_s_inode->i_mapping, index) ;
1593         error = -ENOMEM ;
1594         if (!page) {
1595             goto out ;
1596         }
1597         /* start within the page of the last block in the file */
1598         start = (offset / blocksize) * blocksize ;
1599     
1600         error = block_prepare_write(page, start, offset, 
1601     				reiserfs_get_block_create_0) ;
1602         if (error)
1603     	goto unlock ;
1604     
1605         kunmap(page) ; /* mapped by block_prepare_write */
1606     
1607         head = page->buffers ;      
1608         bh = head;
1609         do {
1610     	if (pos >= start) {
1611     	    break ;
1612     	}
1613     	bh = bh->b_this_page ;
1614     	pos += blocksize ;
1615         } while(bh != head) ;
1616     
1617         if (!buffer_uptodate(bh)) {
1618     	/* note, this should never happen, prepare_write should
1619     	** be taking care of this for us.  If the buffer isn't up to date,
1620     	** I've screwed up the code to find the buffer, or the code to
1621     	** call prepare_write
1622     	*/
1623     	reiserfs_warning("clm-6000: error reading block %lu on dev %s\n",
1624     	                  bh->b_blocknr, kdevname(bh->b_dev)) ;
1625     	error = -EIO ;
1626     	goto unlock ;
1627         }
1628         *bh_result = bh ;
1629         *page_result = page ;
1630     
1631     out:
1632         return error ;
1633     
1634     unlock:
1635         UnlockPage(page) ;
1636         page_cache_release(page) ;
1637         return error ;
1638     }
1639     
1640     /*
1641     ** vfs version of truncate file.  Must NOT be called with
1642     ** a transaction already started.
1643     **
1644     ** some code taken from block_truncate_page
1645     */
1646     void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
1647         struct reiserfs_transaction_handle th ;
1648         int windex ;
1649     
1650         /* we want the offset for the first byte after the end of the file */
1651         unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1652         unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
1653         unsigned length ;
1654         struct page *page = NULL ;
1655         int error ;
1656         struct buffer_head *bh = NULL ;
1657     
1658         if (p_s_inode->i_size > 0) {
1659             if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1660     	    // -ENOENT means we truncated past the end of the file, 
1661     	    // and get_block_create_0 could not find a block to read in,
1662     	    // which is ok.
1663     	    if (error != -ENOENT)
1664     	        reiserfs_warning("clm-6001: grab_tail_page failed %d\n", error);
1665     	    page = NULL ;
1666     	    bh = NULL ;
1667     	}
1668         }
1669     
1670         /* so, if page != NULL, we have a buffer head for the offset at 
1671         ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 
1672         ** then we have an unformatted node.  Otherwise, we have a direct item, 
1673         ** and no zeroing is required on disk.  We zero after the truncate, 
1674         ** because the truncate might pack the item anyway 
1675         ** (it will unmap bh if it packs).
1676         */
1677         journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
1678         windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
1679         reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1680         pop_journal_writer(windex) ;
1681         journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
1682     
1683         if (page) {
1684             length = offset & (blocksize - 1) ;
1685     	/* if we are not on a block boundary */
1686     	if (length) {
1687     	    length = blocksize - length ;
1688     	    memset((char *)kmap(page) + offset, 0, length) ;   
1689     	    flush_dcache_page(page) ;
1690     	    kunmap(page) ;
1691     	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1692     	        mark_buffer_dirty(bh) ;
1693     	    }
1694     	}
1695     	UnlockPage(page) ;
1696     	page_cache_release(page) ;
1697         }
1698     
1699         return ;
1700     }
1701     
1702     static int map_block_for_writepage(struct inode *inode, 
1703     			       struct buffer_head *bh_result, 
1704                                    unsigned long block) {
1705         struct reiserfs_transaction_handle th ;
1706         int fs_gen ;
1707         struct item_head tmp_ih ;
1708         struct item_head *ih ;
1709         struct buffer_head *bh ;
1710         __u32 *item ;
1711         struct cpu_key key ;
1712         INITIALIZE_PATH(path) ;
1713         int pos_in_item ;
1714         int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
1715         loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
1716         int retval ;
1717         int use_get_block = 0 ;
1718         int bytes_copied = 0 ;
1719         int copy_size ;
1720     
1721     start_over:
1722         lock_kernel() ;
1723         journal_begin(&th, inode->i_sb, jbegin_count) ;
1724     
1725         make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
1726     
1727     research:
1728         retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
1729         if (retval != POSITION_FOUND) {
1730             use_get_block = 1;
1731     	goto out ;
1732         } 
1733     
1734         bh = get_last_bh(&path) ;
1735         ih = get_ih(&path) ;
1736         item = get_item(&path) ;
1737         pos_in_item = path.pos_in_item ;
1738     
1739         /* we've found an unformatted node */
1740         if (indirect_item_found(retval, ih)) {
1741     	if (bytes_copied > 0) {
1742     	    reiserfs_warning("clm-6002: bytes_copied %d\n", bytes_copied) ;
1743     	}
1744             if (!item[pos_in_item]) {
1745     	    /* crap, we are writing to a hole */
1746     	    use_get_block = 1;
1747     	    goto out ;
1748     	}
1749     	set_block_dev_mapped(bh_result, le32_to_cpu(item[pos_in_item]), inode);
1750     	mark_buffer_uptodate(bh_result, 1);
1751         } else if (is_direct_le_ih(ih)) {
1752             char *p ; 
1753             p = page_address(bh_result->b_page) ;
1754             p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
1755             copy_size = le16_to_cpu(ih->ih_item_len) - pos_in_item ;
1756     
1757     	fs_gen = get_generation(inode->i_sb) ;
1758     	copy_item_head(&tmp_ih, ih) ;
1759     	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1760     	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
1761     	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1762     	    goto research;
1763     	}
1764     
1765     	memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1766     
1767     	journal_mark_dirty(&th, inode->i_sb, bh) ;
1768     	bytes_copied += copy_size ;
1769     	set_block_dev_mapped(bh_result, 0, inode);
1770     	mark_buffer_uptodate(bh_result, 1);
1771     
1772     	/* are there still bytes left? */
1773             if (bytes_copied < bh_result->b_size && 
1774     	    (byte_offset + bytes_copied) < inode->i_size) {
1775     	    set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
1776     	    goto research ;
1777     	}
1778         } else {
1779             reiserfs_warning("clm-6003: bad item inode %lu, device %s\n", inode->i_ino, kdevname(inode->i_sb->s_dev)) ;
1780             retval = -EIO ;
1781     	goto out ;
1782         }
1783         retval = 0 ;
1784         
1785     out:
1786         pathrelse(&path) ;
1787         journal_end(&th, inode->i_sb, jbegin_count) ;
1788         unlock_kernel() ;
1789     
1790         /* this is where we fill in holes in the file. */
1791         if (use_get_block) {
1792             kmap(bh_result->b_page) ;
1793     	retval = reiserfs_get_block(inode, block, bh_result, 
1794     	                            GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
1795             kunmap(bh_result->b_page) ;
1796     	if (!retval) {
1797     	    if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
1798     	        /* get_block failed to find a mapped unformatted node. */
1799     		use_get_block = 0 ;
1800     		goto start_over ;
1801     	    }
1802     	}
1803         }
1804         return retval ;
1805     }
1806     
1807     /* helper func to get a buffer head ready for writepage to send to
1808     ** ll_rw_block
1809     */
1810     static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
1811         struct buffer_head *bh ;
1812         int i;
1813         for(i = 0 ; i < nr ; i++) {
1814             bh = bhp[i] ;
1815     	lock_buffer(bh) ;
1816     	set_buffer_async_io(bh) ;
1817     	/* submit_bh doesn't care if the buffer is dirty, but nobody
1818     	** later on in the call chain will be cleaning it.  So, we
1819     	** clean the buffer here, it still gets written either way.
1820     	*/
1821     	clear_bit(BH_Dirty, &bh->b_state) ;
1822     	set_bit(BH_Uptodate, &bh->b_state) ;
1823     	submit_bh(WRITE, bh) ;
1824         }
1825     }
1826     
1827     static int reiserfs_write_full_page(struct page *page) {
1828         struct inode *inode = page->mapping->host ;
1829         unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
1830         unsigned last_offset = PAGE_CACHE_SIZE;
1831         int error = 0;
1832         unsigned long block ;
1833         unsigned cur_offset = 0 ;
1834         struct buffer_head *head, *bh ;
1835         int partial = 0 ;
1836         struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
1837         int nr = 0 ;
1838     
1839         if (!page->buffers) {
1840             block_prepare_write(page, 0, 0, NULL) ;
1841     	kunmap(page) ;
1842         }
1843         /* last page in the file, zero out any contents past the
1844         ** last byte in the file
1845         */
1846         if (page->index >= end_index) {
1847             last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1848     	/* no file contents in this page */
1849     	if (page->index >= end_index + 1 || !last_offset) {
1850     	    error =  -EIO ;
1851     	    goto fail ;
1852     	}
1853     	memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
1854     	flush_dcache_page(page) ;
1855     	kunmap(page) ;
1856         }
1857         head = page->buffers ;
1858         bh = head ;
1859         block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
1860         do {
1861     	/* if this offset in the page is outside the file */
1862     	if (cur_offset >= last_offset) {
1863     	    if (!buffer_uptodate(bh))
1864     	        partial = 1 ;
1865     	} else {
1866     	    /* fast path, buffer mapped to an unformatted node */
1867     	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1868     		arr[nr++] = bh ;
1869     	    } else {
1870     		/* buffer not mapped yet, or points to a direct item.
1871     		** search and dirty or log
1872     		*/
1873     		if ((error = map_block_for_writepage(inode, bh, block))) {
1874     		    goto fail ;
1875     		}
1876     		/* map_block_for_writepage either found an unformatted node
1877     		** and mapped it for us, or it found a direct item
1878     		** and logged the changes.  
1879     		*/
1880     		if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1881     		    arr[nr++] = bh ;
1882     		}
1883     	    }
1884     	}
1885             bh = bh->b_this_page ;
1886     	cur_offset += bh->b_size ;
1887     	block++ ;
1888         } while(bh != head) ;
1889     
1890         /* if this page only had a direct item, it is very possible for
1891         ** nr == 0 without there being any kind of error.
1892         */
1893         if (nr) {
1894             submit_bh_for_writepage(arr, nr) ;
1895         } else {
1896             UnlockPage(page) ;
1897         }
1898         if (!partial)
1899             SetPageUptodate(page) ;
1900     
1901         return 0 ;
1902     
1903     fail:
1904         if (nr) {
1905             submit_bh_for_writepage(arr, nr) ;
1906         } else {
1907             UnlockPage(page) ;
1908         }
1909         ClearPageUptodate(page) ;
1910         return error ;
1911     }
1912     
1913     //
1914     // this is exactly what 2.3.99-pre9's ext2_readpage is
1915     //
1916     static int reiserfs_readpage (struct file *f, struct page * page)
1917     {
1918         return block_read_full_page (page, reiserfs_get_block);
1919     }
1920     
1921     
1922     //
1923     // modified from ext2_writepage is
1924     //
1925     static int reiserfs_writepage (struct page * page)
1926     {
1927         struct inode *inode = page->mapping->host ;
1928         reiserfs_wait_on_write_block(inode->i_sb) ;
1929         return reiserfs_write_full_page(page) ;
1930     }
1931     
1932     
1933     //
1934     // from ext2_prepare_write, but modified
1935     //
1936     int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to) {
1937         struct inode *inode = page->mapping->host ;
1938         reiserfs_wait_on_write_block(inode->i_sb) ;
1939         fix_tail_page_for_writing(page) ;
1940         return block_prepare_write(page, from, to, reiserfs_get_block) ;
1941     }
1942     
1943     
1944     //
1945     // this is exactly what 2.3.99-pre9's ext2_bmap is
1946     //
1947     static int reiserfs_aop_bmap(struct address_space *as, long block) {
1948       return generic_block_bmap(as, block, reiserfs_bmap) ;
1949     }
1950     
1951     static int reiserfs_commit_write(struct file *f, struct page *page,
1952     			         unsigned from, unsigned to) {
1953         struct inode *inode = page->mapping->host;
1954         int ret ;
1955     
1956         reiserfs_wait_on_write_block(inode->i_sb) ;
1957         ret = generic_commit_write(f, page, from, to) ;
1958     
1959         /* we test for O_SYNC here so we can commit the transaction
1960         ** for any packed tails the file might have had
1961         */
1962         if (f->f_flags & O_SYNC) {
1963     	struct reiserfs_transaction_handle th ;
1964     	lock_kernel() ;
1965     	journal_begin(&th, inode->i_sb, 1) ;
1966     	reiserfs_prepare_for_journal(inode->i_sb, 
1967     	                             SB_BUFFER_WITH_SB(inode->i_sb), 1) ;
1968     	journal_mark_dirty(&th, inode->i_sb, SB_BUFFER_WITH_SB(inode->i_sb)) ;
1969     	journal_end_sync(&th, inode->i_sb, 1) ;
1970     	unlock_kernel() ;
1971         }
1972         return ret ;
1973     }
1974     
1975     struct address_space_operations reiserfs_address_space_operations = {
1976         writepage: reiserfs_writepage,
1977         readpage: reiserfs_readpage, 
1978         sync_page: block_sync_page,
1979         prepare_write: reiserfs_prepare_write,
1980         commit_write: reiserfs_commit_write,
1981         bmap: reiserfs_aop_bmap
1982     } ;
1983