File: /usr/src/linux/fs/reiserfs/inode.c
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 #include <linux/config.h>
6 #include <linux/sched.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/locks.h>
9 #include <linux/smp_lock.h>
10 #include <asm/uaccess.h>
11
12 /* args for the create parameter of reiserfs_get_block */
13 #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
14 #define GET_BLOCK_CREATE 1 /* add anything you need to find block */
15 #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
16 #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
17 #define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */
18
19 //
20 // initially this function was derived from minix or ext2's analog and
21 // evolved as the prototype did
22 //
23 void reiserfs_delete_inode (struct inode * inode)
24 {
25 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2;
26 int windex ;
27 struct reiserfs_transaction_handle th ;
28
29
30 lock_kernel() ;
31
32 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
33 if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
34 down (&inode->i_sem);
35
36 journal_begin(&th, inode->i_sb, jbegin_count) ;
37 windex = push_journal_writer("delete_inode") ;
38
39 reiserfs_delete_object (&th, inode);
40 pop_journal_writer(windex) ;
41 reiserfs_release_objectid (&th, inode->i_ino);
42
43 journal_end(&th, inode->i_sb, jbegin_count) ;
44
45 up (&inode->i_sem);
46 } else {
47 /* no object items are in the tree */
48 ;
49 }
50 clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
51 inode->i_blocks = 0;
52 unlock_kernel() ;
53 }
54
55 static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid,
56 loff_t offset, int type, int length)
57 {
58 key->version = version;
59
60 key->on_disk_key.k_dir_id = dirid;
61 key->on_disk_key.k_objectid = objectid;
62 set_cpu_key_k_offset (key, offset);
63 set_cpu_key_k_type (key, type);
64 key->key_length = length;
65 }
66
67
68 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
69 offset and type of key */
70 void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
71 int type, int length)
72 {
73 _make_cpu_key (key, inode_items_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
74 le32_to_cpu (INODE_PKEY (inode)->k_objectid),
75 offset, type, length);
76 }
77
78
79 //
80 // when key is 0, do not set version and short key
81 //
82 inline void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version,
83 loff_t offset, int type, int length, int entry_count/*or ih_free_space*/)
84 {
85 if (key) {
86 ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
87 ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
88 }
89 ih->ih_version = cpu_to_le16 (version);
90 set_le_ih_k_offset (ih, offset);
91 set_le_ih_k_type (ih, type);
92 ih->ih_item_len = cpu_to_le16 (length);
93 /* set_ih_free_space (ih, 0);*/
94 // for directory items it is entry count, for directs and stat
95 // datas - 0xffff, for indirects - 0
96 ih->u.ih_entry_count = cpu_to_le16 (entry_count);
97 }
98
99 static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
100 struct inode *jinode = &(SB_JOURNAL(inode->i_sb)->j_dummy_inode) ;
101
102 buffer_insert_inode_queue(bh, jinode) ;
103 }
104
105 //
106 // FIXME: we might cache recently accessed indirect item (or at least
107 // first 15 pointers just like ext2 does
108
109 // Ugh. Not too eager for that....
110 // I cut the code until such time as I see a convincing argument (benchmark).
111 // I don't want a bloated inode struct..., and I don't like code complexity....
112
113 /* cutting the code is fine, since it really isn't in use yet and is easy
114 ** to add back in. But, Vladimir has a really good idea here. Think
115 ** about what happens for reading a file. For each page,
116 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
117 ** an indirect item. This indirect item has X number of pointers, where
118 ** X is a big number if we've done the block allocation right. But,
119 ** we only use one or two of these pointers during each call to readpage,
120 ** needlessly researching again later on.
121 **
122 ** The size of the cache could be dynamic based on the size of the file.
123 **
124 ** I'd also like to see us cache the location the stat data item, since
125 ** we are needlessly researching for that frequently.
126 **
127 ** --chris
128 */
129
130 /* If this page has a file tail in it, and
131 ** it was read in by get_block_create_0, the page data is valid,
132 ** but tail is still sitting in a direct item, and we can't write to
133 ** it. So, look through this page, and check all the mapped buffers
134 ** to make sure they have valid block numbers. Any that don't need
135 ** to be unmapped, so that block_prepare_write will correctly call
136 ** reiserfs_get_block to convert the tail into an unformatted node
137 */
138 static inline void fix_tail_page_for_writing(struct page *page) {
139 struct buffer_head *head, *next, *bh ;
140
141 if (page && page->buffers) {
142 head = page->buffers ;
143 bh = head ;
144 do {
145 next = bh->b_this_page ;
146 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
147 reiserfs_unmap_buffer(bh) ;
148 }
149 bh = next ;
150 } while (bh != head) ;
151 }
152 }
153
154
155
156
157 /* we need to allocate a block for new unformatted node. Try to figure out
158 what point in bitmap reiserfs_new_blocknrs should start from. */
159 static b_blocknr_t find_tag (struct buffer_head * bh, struct item_head * ih,
160 __u32 * item, int pos_in_item)
161 {
162 if (!is_indirect_le_ih (ih))
163 /* something more complicated could be here */
164 return bh->b_blocknr;
165
166 /* for indirect item: go to left and look for the first non-hole entry in
167 the indirect item */
168 if (pos_in_item == I_UNFM_NUM (ih))
169 pos_in_item --;
170 while (pos_in_item >= 0) {
171 if (item [pos_in_item])
172 return item [pos_in_item];
173 pos_in_item --;
174 }
175 return bh->b_blocknr;
176 }
177
178
179 /* reiserfs_get_block does not need to allocate a block only if it has been
180 done already or non-hole position has been found in the indirect item */
181 static inline int allocation_needed (int retval, b_blocknr_t allocated,
182 struct item_head * ih,
183 __u32 * item, int pos_in_item)
184 {
185 if (allocated)
186 return 0;
187 if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && item[pos_in_item])
188 return 0;
189 return 1;
190 }
191
192 static inline int indirect_item_found (int retval, struct item_head * ih)
193 {
194 return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
195 }
196
197
198 static inline void set_block_dev_mapped (struct buffer_head * bh,
199 b_blocknr_t block, struct inode * inode)
200 {
201 bh->b_dev = inode->i_dev;
202 bh->b_blocknr = block;
203 bh->b_state |= (1UL << BH_Mapped);
204 }
205
206
207 //
208 // files which were created in the earlier version can not be longer,
209 // than 2 gb
210 //
211 int file_capable (struct inode * inode, long block)
212 {
213 if (inode_items_version (inode) != ITEM_VERSION_1 || // it is new file.
214 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
215 return 1;
216
217 return 0;
218 }
219
220 /*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
221 struct inode *inode, struct path *path) {
222 struct super_block *s = th->t_super ;
223 int len = th->t_blocks_allocated ;
224
225 pathrelse(path) ;
226 reiserfs_update_sd(th, inode) ;
227 journal_end(th, s, len) ;
228 journal_begin(th, s, len) ;
229 }
230
231 // it is called by get_block when create == 0. Returns block number
232 // for 'block'-th logical block of file. When it hits direct item it
233 // returns 0 (being called from bmap) or read direct item into piece
234 // of page (bh_result)
235
236 // Please improve the english/clarity in the comment above, as it is
237 // hard to understand.
238
239 static int _get_block_create_0 (struct inode * inode, long block,
240 struct buffer_head * bh_result,
241 int args)
242 {
243 INITIALIZE_PATH (path);
244 struct cpu_key key;
245 struct buffer_head * bh;
246 struct item_head * ih, tmp_ih;
247 int fs_gen ;
248 int blocknr;
249 char * p = NULL;
250 int chars;
251 int ret ;
252 int done = 0 ;
253 unsigned long offset ;
254
255 // prepare the key to look for the 'block'-th block of file
256 make_cpu_key (&key, inode,
257 (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
258
259 research:
260 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
261 pathrelse (&path);
262 if (p)
263 kunmap(bh_result->b_page) ;
264 if ((args & GET_BLOCK_NO_HOLE)) {
265 return -ENOENT ;
266 }
267 return 0 ;
268 }
269
270 //
271 bh = get_last_bh (&path);
272 ih = get_ih (&path);
273 if (is_indirect_le_ih (ih)) {
274 __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
275
276 /* FIXME: here we could cache indirect item or part of it in
277 the inode to avoid search_by_key in case of subsequent
278 access to file */
279 blocknr = le32_to_cpu (ind_item [path.pos_in_item]);
280 ret = 0 ;
281 if (blocknr) {
282 bh_result->b_dev = inode->i_dev;
283 bh_result->b_blocknr = blocknr;
284 bh_result->b_state |= (1UL << BH_Mapped);
285 } else if ((args & GET_BLOCK_NO_HOLE)) {
286 ret = -ENOENT ;
287 }
288 pathrelse (&path);
289 if (p)
290 kunmap(bh_result->b_page) ;
291 return ret ;
292 }
293
294 // requested data are in direct item(s)
295 if (!(args & GET_BLOCK_READ_DIRECT)) {
296 // we are called by bmap. FIXME: we can not map block of file
297 // when it is stored in direct item(s)
298 pathrelse (&path);
299 if (p)
300 kunmap(bh_result->b_page) ;
301 return -ENOENT;
302 }
303
304 /* if we've got a direct item, and the buffer was uptodate,
305 ** we don't want to pull data off disk again. skip to the
306 ** end, where we map the buffer and return
307 */
308 if (buffer_uptodate(bh_result)) {
309 goto finished ;
310 }
311
312 // read file tail into part of page
313 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
314 fs_gen = get_generation(inode->i_sb) ;
315 copy_item_head (&tmp_ih, ih);
316
317 /* we only want to kmap if we are reading the tail into the page.
318 ** this is not the common case, so we don't kmap until we are
319 ** sure we need to. But, this means the item might move if
320 ** kmap schedules
321 */
322 if (!p) {
323 p = (char *)kmap(bh_result->b_page) ;
324 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
325 goto research;
326 }
327 }
328 p += offset ;
329 memset (p, 0, inode->i_sb->s_blocksize);
330 do {
331 if (!is_direct_le_ih (ih)) {
332 BUG ();
333 }
334 /* make sure we don't read more bytes than actually exist in
335 ** the file. This can happen in odd cases where i_size isn't
336 ** correct, and when direct item padding results in a few
337 ** extra bytes at the end of the direct item
338 */
339 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
340 break ;
341 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
342 chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
343 done = 1 ;
344 } else {
345 chars = le16_to_cpu (ih->ih_item_len) - path.pos_in_item;
346 }
347 memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
348
349 if (done)
350 break ;
351
352 p += chars;
353
354 if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
355 // we done, if read direct item is not the last item of
356 // node FIXME: we could try to check right delimiting key
357 // to see whether direct item continues in the right
358 // neighbor or rely on i_size
359 break;
360
361 // update key to look for the next piece
362 set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
363 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
364 // we read something from tail, even if now we got IO_ERROR
365 break;
366 bh = get_last_bh (&path);
367 ih = get_ih (&path);
368 } while (1);
369
370 flush_dcache_page(bh_result->b_page) ;
371 kunmap(bh_result->b_page) ;
372
373 finished:
374 pathrelse (&path);
375 bh_result->b_blocknr = 0 ;
376 bh_result->b_dev = inode->i_dev;
377 mark_buffer_uptodate (bh_result, 1);
378 bh_result->b_state |= (1UL << BH_Mapped);
379 return 0;
380 }
381
382
383 // this is called to create file map. So, _get_block_create_0 will not
384 // read direct item
385 int reiserfs_bmap (struct inode * inode, long block,
386 struct buffer_head * bh_result, int create)
387 {
388 if (!file_capable (inode, block))
389 return -EFBIG;
390
391 lock_kernel() ;
392 /* do not read the direct item */
393 _get_block_create_0 (inode, block, bh_result, 0) ;
394 unlock_kernel() ;
395 return 0;
396 }
397
398 /* special version of get_block that is only used by grab_tail_page right
399 ** now. It is sent to block_prepare_write, and when you try to get a
400 ** block past the end of the file (or a block from a hole) it returns
401 ** -ENOENT instead of a valid buffer. block_prepare_write expects to
402 ** be able to do i/o on the buffers returned, unless an error value
403 ** is also returned.
404 **
405 ** So, this allows block_prepare_write to be used for reading a single block
406 ** in a page. Where it does not produce a valid page for holes, or past the
407 ** end of the file. This turns out to be exactly what we need for reading
408 ** tails for conversion.
409 **
410 ** The point of the wrapper is forcing a certain value for create, even
411 ** though the VFS layer is calling this function with create==1. If you
412 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
413 ** don't use this function.
414 */
415 static int reiserfs_get_block_create_0 (struct inode * inode, long block,
416 struct buffer_head * bh_result, int create) {
417 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
418 }
419
420 /*
421 ** helper function for when reiserfs_get_block is called for a hole
422 ** but the file tail is still in a direct item
423 ** bh_result is the buffer head for the hole
424 ** tail_offset is the offset of the start of the tail in the file
425 **
426 ** This calls prepare_write, which will start a new transaction
427 ** you should not be in a transaction, or have any paths held when you
428 ** call this.
429 */
430 static int convert_tail_for_hole(struct inode *inode,
431 struct buffer_head *bh_result,
432 loff_t tail_offset) {
433 unsigned long index ;
434 unsigned long tail_end ;
435 unsigned long tail_start ;
436 struct page * tail_page ;
437 struct page * hole_page = bh_result->b_page ;
438 int retval = 0 ;
439
440 if ((tail_offset & (bh_result->b_size - 1)) != 1)
441 return -EIO ;
442
443 /* always try to read until the end of the block */
444 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
445 tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
446
447 index = tail_offset >> PAGE_CACHE_SHIFT ;
448 if (index != hole_page->index) {
449 tail_page = grab_cache_page(inode->i_mapping, index) ;
450 retval = -ENOMEM;
451 if (!tail_page) {
452 goto out ;
453 }
454 } else {
455 tail_page = hole_page ;
456 }
457
458 /* we don't have to make sure the conversion did not happen while
459 ** we were locking the page because anyone that could convert
460 ** must first take i_sem.
461 **
462 ** We must fix the tail page for writing because it might have buffers
463 ** that are mapped, but have a block number of 0. This indicates tail
464 ** data that has been read directly into the page, and block_prepare_write
465 ** won't trigger a get_block in this case.
466 */
467 fix_tail_page_for_writing(tail_page) ;
468 retval = block_prepare_write(tail_page, tail_start, tail_end,
469 reiserfs_get_block) ;
470 if (retval)
471 goto unlock ;
472
473 /* tail conversion might change the data in the page */
474 flush_dcache_page(tail_page) ;
475
476 retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
477
478 unlock:
479 if (tail_page != hole_page) {
480 UnlockPage(tail_page) ;
481 page_cache_release(tail_page) ;
482 }
483 out:
484 return retval ;
485 }
486
487 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
488 struct inode *inode,
489 b_blocknr_t *allocated_block_nr,
490 unsigned long tag,
491 int flags) {
492
493 #ifdef REISERFS_PREALLOCATE
494 if (!(flags & GET_BLOCK_NO_ISEM)) {
495 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, tag);
496 }
497 #endif
498 return reiserfs_new_unf_blocknrs (th, allocated_block_nr, tag);
499 }
500 //
501 // initially this function was derived from ext2's analog and evolved
502 // as the prototype did. You'll need to look at the ext2 version to
503 // determine which parts are derivative, if any, understanding that
504 // there are only so many ways to code to a given interface.
505 //
506 int reiserfs_get_block (struct inode * inode, long block,
507 struct buffer_head * bh_result, int create)
508 {
509 int repeat, retval;
510 unsigned long tag;
511 b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
512 INITIALIZE_PATH(path);
513 int pos_in_item;
514 struct cpu_key key;
515 struct buffer_head * bh, * unbh = 0;
516 struct item_head * ih, tmp_ih;
517 __u32 * item;
518 int done;
519 int fs_gen;
520 int windex ;
521 struct reiserfs_transaction_handle th ;
522 /* space reserved in transaction batch:
523 . 3 balancings in direct->indirect conversion
524 . 1 block involved into reiserfs_update_sd()
525 XXX in practically impossible worst case direct2indirect()
526 can incur (much) more that 3 balancings. */
527 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
528 int version;
529 int transaction_started = 0 ;
530 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
531
532 /* bad.... */
533 lock_kernel() ;
534 th.t_trans_id = 0 ;
535 version = inode_items_version (inode);
536
537 if (block < 0) {
538 unlock_kernel();
539 return -EIO;
540 }
541
542 if (!file_capable (inode, block)) {
543 unlock_kernel() ;
544 return -EFBIG;
545 }
546
547 /* if !create, we aren't changing the FS, so we don't need to
548 ** log anything, so we don't need to start a transaction
549 */
550 if (!(create & GET_BLOCK_CREATE)) {
551 int ret ;
552 /* find number of block-th logical block of the file */
553 ret = _get_block_create_0 (inode, block, bh_result,
554 create | GET_BLOCK_READ_DIRECT) ;
555 unlock_kernel() ;
556 return ret;
557 }
558
559 inode->u.reiserfs_i.i_pack_on_close = 1 ;
560
561 windex = push_journal_writer("reiserfs_get_block") ;
562
563 /* set the key of the first byte in the 'block'-th block of file */
564 make_cpu_key (&key, inode, new_offset,
565 TYPE_ANY, 3/*key length*/);
566 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
567 journal_begin(&th, inode->i_sb, jbegin_count) ;
568 transaction_started = 1 ;
569 }
570 research:
571
572 retval = search_for_position_by_key (inode->i_sb, &key, &path);
573 if (retval == IO_ERROR) {
574 retval = -EIO;
575 goto failure;
576 }
577
578 bh = get_last_bh (&path);
579 ih = get_ih (&path);
580 item = get_item (&path);
581 pos_in_item = path.pos_in_item;
582
583 fs_gen = get_generation (inode->i_sb);
584 copy_item_head (&tmp_ih, ih);
585
586 if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
587 /* we have to allocate block for the unformatted node */
588 tag = find_tag (bh, ih, item, pos_in_item);
589 if (!transaction_started) {
590 pathrelse(&path) ;
591 journal_begin(&th, inode->i_sb, jbegin_count) ;
592 transaction_started = 1 ;
593 goto research ;
594 }
595
596 repeat = _allocate_block(&th, inode, &allocated_block_nr, tag, create);
597
598 if (repeat == NO_DISK_SPACE) {
599 /* restart the transaction to give the journal a chance to free
600 ** some blocks. releases the path, so we have to go back to
601 ** research if we succeed on the second try
602 */
603 restart_transaction(&th, inode, &path) ;
604 repeat = _allocate_block(&th, inode,&allocated_block_nr,tag,create);
605
606 if (repeat != NO_DISK_SPACE) {
607 goto research ;
608 }
609 retval = -ENOSPC;
610 goto failure;
611 }
612
613 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
614 goto research;
615 }
616 }
617
618 if (indirect_item_found (retval, ih)) {
619 b_blocknr_t unfm_ptr;
620
621 /* 'block'-th block is in the file already (there is
622 corresponding cell in some indirect item). But it may be
623 zero unformatted node pointer (hole) */
624 unfm_ptr = le32_to_cpu (item[pos_in_item]);
625 if (unfm_ptr == 0) {
626 /* use allocated block to plug the hole */
627 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
628 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
629 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
630 goto research;
631 }
632 bh_result->b_state |= (1UL << BH_New);
633 item[pos_in_item] = cpu_to_le32 (allocated_block_nr);
634 unfm_ptr = allocated_block_nr;
635 journal_mark_dirty (&th, inode->i_sb, bh);
636 inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
637 reiserfs_update_sd(&th, inode) ;
638 }
639 set_block_dev_mapped(bh_result, unfm_ptr, inode);
640 pathrelse (&path);
641 pop_journal_writer(windex) ;
642 if (transaction_started)
643 journal_end(&th, inode->i_sb, jbegin_count) ;
644
645 unlock_kernel() ;
646
647 /* the item was found, so new blocks were not added to the file
648 ** there is no need to make sure the inode is updated with this
649 ** transaction
650 */
651 return 0;
652 }
653
654 if (!transaction_started) {
655 /* if we don't pathrelse, we could vs-3050 on the buffer if
656 ** someone is waiting for it (they can't finish until the buffer
657 ** is released, we can start a new transaction until they finish)
658 */
659 pathrelse(&path) ;
660 journal_begin(&th, inode->i_sb, jbegin_count) ;
661 transaction_started = 1 ;
662 goto research;
663 }
664
665 /* desired position is not found or is in the direct item. We have
666 to append file with holes up to 'block'-th block converting
667 direct items to indirect one if necessary */
668 done = 0;
669 do {
670 if (is_statdata_le_ih (ih)) {
671 __u32 unp = 0;
672 struct cpu_key tmp_key;
673
674 /* indirect item has to be inserted */
675 make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT,
676 UNFM_P_SIZE, 0/* free_space */);
677
678 if (cpu_key_k_offset (&key) == 1) {
679 /* we are going to add 'block'-th block to the file. Use
680 allocated block for that */
681 unp = cpu_to_le32 (allocated_block_nr);
682 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
683 bh_result->b_state |= (1UL << BH_New);
684 done = 1;
685 }
686 tmp_key = key; // ;)
687 set_cpu_key_k_offset (&tmp_key, 1);
688 PATH_LAST_POSITION(&path) ++;
689
690 retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
691 if (retval) {
692 reiserfs_free_block (&th, allocated_block_nr);
693 goto failure; // retval == -ENOSPC or -EIO or -EEXIST
694 }
695 if (unp)
696 inode->i_blocks += inode->i_sb->s_blocksize / 512;
697 //mark_tail_converted (inode);
698 } else if (is_direct_le_ih (ih)) {
699 /* direct item has to be converted */
700 loff_t tail_offset;
701
702 tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
703 if (tail_offset == cpu_key_k_offset (&key)) {
704 /* direct item we just found fits into block we have
705 to map. Convert it into unformatted node: use
706 bh_result for the conversion */
707 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
708 unbh = bh_result;
709 done = 1;
710 } else {
711 /* we have to padd file tail stored in direct item(s)
712 up to block size and convert it to unformatted
713 node. FIXME: this should also get into page cache */
714
715 pathrelse(&path) ;
716 journal_end(&th, inode->i_sb, jbegin_count) ;
717 transaction_started = 0 ;
718
719 retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
720 if (retval) {
721 printk("clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
722 if (allocated_block_nr)
723 reiserfs_free_block (&th, allocated_block_nr);
724 goto failure ;
725 }
726 goto research ;
727 }
728 retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
729 /* it is important the mark_buffer_uptodate is done after
730 ** the direct2indirect. The buffer might contain valid
731 ** data newer than the data on disk (read by readpage, changed,
732 ** and then sent here by writepage). direct2indirect needs
733 ** to know if unbh was already up to date, so it can decide
734 ** if the data in unbh needs to be replaced with data from
735 ** the disk
736 */
737 mark_buffer_uptodate (unbh, 1);
738 if (retval) {
739 reiserfs_free_block (&th, allocated_block_nr);
740 goto failure;
741 }
742 /* we've converted the tail, so we must
743 ** flush unbh before the transaction commits
744 */
745 add_to_flushlist(inode, unbh) ;
746
747 /* mark it dirty now to prevent commit_write from adding
748 ** this buffer to the inode's dirty buffer list
749 */
750 __mark_buffer_dirty(unbh) ;
751
752 //inode->i_blocks += inode->i_sb->s_blocksize / 512;
753 //mark_tail_converted (inode);
754 } else {
755 /* append indirect item with holes if needed, when appending
756 pointer to 'block'-th block use block, which is already
757 allocated */
758 struct cpu_key tmp_key;
759 struct unfm_nodeinfo un = {0, 0};
760
761 #ifdef CONFIG_REISERFS_CHECK
762 if (pos_in_item != le16_to_cpu (ih->ih_item_len) / UNFM_P_SIZE)
763 reiserfs_panic (inode->i_sb, "vs-: reiserfs_get_block: "
764 "invalid position for append");
765 #endif
766 /* indirect item has to be appended, set up key of that position */
767 make_cpu_key (&tmp_key, inode,
768 le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
769 //pos_in_item * inode->i_sb->s_blocksize,
770 TYPE_INDIRECT, 3);// key type is unimportant
771
772 if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) {
773 /* we are going to add target block to the file. Use allocated
774 block for that */
775 un.unfm_nodenum = cpu_to_le32 (allocated_block_nr);
776 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
777 bh_result->b_state |= (1UL << BH_New);
778 done = 1;
779 } else {
780 /* paste hole to the indirect item */
781 }
782 retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE);
783 if (retval) {
784 reiserfs_free_block (&th, allocated_block_nr);
785 goto failure;
786 }
787 if (un.unfm_nodenum)
788 inode->i_blocks += inode->i_sb->s_blocksize / 512;
789 //mark_tail_converted (inode);
790 }
791
792 if (done == 1)
793 break;
794
795 /* this loop could log more blocks than we had originally asked
796 ** for. So, we have to allow the transaction to end if it is
797 ** too big or too full. Update the inode so things are
798 ** consistent if we crash before the function returns
799 **
800 ** release the path so that anybody waiting on the path before
801 ** ending their transaction will be able to continue.
802 */
803 if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
804 restart_transaction(&th, inode, &path) ;
805 }
806 /* inserting indirect pointers for a hole can take a
807 ** long time. reschedule if needed
808 */
809 if (current->need_resched)
810 schedule() ;
811
812 retval = search_for_position_by_key (inode->i_sb, &key, &path);
813 if (retval == IO_ERROR) {
814 retval = -EIO;
815 goto failure;
816 }
817 if (retval == POSITION_FOUND) {
818 reiserfs_warning ("vs-825: reiserfs_get_block: "
819 "%k should not be found\n", &key);
820 retval = -EEXIST;
821 if (allocated_block_nr)
822 reiserfs_free_block (&th, allocated_block_nr);
823 pathrelse(&path) ;
824 goto failure;
825 }
826 bh = get_last_bh (&path);
827 ih = get_ih (&path);
828 item = get_item (&path);
829 pos_in_item = path.pos_in_item;
830 } while (1);
831
832
833 retval = 0;
834 reiserfs_check_path(&path) ;
835
836 failure:
837 if (transaction_started) {
838 reiserfs_update_sd(&th, inode) ;
839 journal_end(&th, inode->i_sb, jbegin_count) ;
840 }
841 pop_journal_writer(windex) ;
842 unlock_kernel() ;
843 reiserfs_check_path(&path) ;
844 return retval;
845 }
846
847
848 //
849 // BAD: new directories have stat data of new type and all other items
850 // of old type. Version stored in the inode says about body items, so
851 // in update_stat_data we can not rely on inode, but have to check
852 // item version directly
853 //
854
855 // called by read_inode
856 static void init_inode (struct inode * inode, struct path * path)
857 {
858 struct buffer_head * bh;
859 struct item_head * ih;
860 __u32 rdev;
861 //int version = ITEM_VERSION_1;
862
863 bh = PATH_PLAST_BUFFER (path);
864 ih = PATH_PITEM_HEAD (path);
865
866
867 copy_key (INODE_PKEY (inode), &(ih->ih_key));
868 inode->i_blksize = PAGE_SIZE;
869
870 INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
871
872 if (stat_data_v1 (ih)) {
873 struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
874 unsigned long blocks;
875
876 inode_items_version (inode) = ITEM_VERSION_1;
877 inode->i_mode = le16_to_cpu (sd->sd_mode);
878 inode->i_nlink = le16_to_cpu (sd->sd_nlink);
879 inode->i_uid = le16_to_cpu (sd->sd_uid);
880 inode->i_gid = le16_to_cpu (sd->sd_gid);
881 inode->i_size = le32_to_cpu (sd->sd_size);
882 inode->i_atime = le32_to_cpu (sd->sd_atime);
883 inode->i_mtime = le32_to_cpu (sd->sd_mtime);
884 inode->i_ctime = le32_to_cpu (sd->sd_ctime);
885
886 inode->i_blocks = le32_to_cpu (sd->u.sd_blocks);
887 inode->i_generation = INODE_PKEY (inode)->k_dir_id;
888 blocks = (inode->i_size + 511) >> 9;
889 blocks = _ROUND_UP (blocks, inode->i_blksize >> 9);
890 if (inode->i_blocks > blocks) {
891 // there was a bug in <=3.5.23 when i_blocks could take negative
892 // values. Starting from 3.5.17 this value could even be stored in
893 // stat data. For such files we set i_blocks based on file
894 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
895 // only updated if file's inode will ever change
896 inode->i_blocks = blocks;
897 }
898
899 rdev = le32_to_cpu (sd->u.sd_rdev);
900 inode->u.reiserfs_i.i_first_direct_byte = le32_to_cpu (sd->sd_first_direct_byte);
901 } else {
902 // new stat data found, but object may have old items
903 // (directories and symlinks)
904 struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
905
906 /* both old and new directories have old keys */
907 //version = (S_ISDIR (sd->sd_mode) ? ITEM_VERSION_1 : ITEM_VERSION_2);
908 if (S_ISDIR (sd->sd_mode) || S_ISLNK (sd->sd_mode))
909 inode_items_version (inode) = ITEM_VERSION_1;
910 else
911 inode_items_version (inode) = ITEM_VERSION_2;
912 inode->i_mode = le16_to_cpu (sd->sd_mode);
913 inode->i_nlink = le32_to_cpu (sd->sd_nlink);
914 inode->i_uid = le32_to_cpu (sd->sd_uid);
915 inode->i_size = le64_to_cpu (sd->sd_size);
916 inode->i_gid = le32_to_cpu (sd->sd_gid);
917 inode->i_mtime = le32_to_cpu (sd->sd_mtime);
918 inode->i_atime = le32_to_cpu (sd->sd_atime);
919 inode->i_ctime = le32_to_cpu (sd->sd_ctime);
920 inode->i_blocks = le32_to_cpu (sd->sd_blocks);
921 rdev = le32_to_cpu (sd->u.sd_rdev);
922 if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
923 inode->i_generation = INODE_PKEY (inode)->k_dir_id;
924 else
925 inode->i_generation = le32_to_cpu( sd->u.sd_generation );
926 }
927
928 /* nopack = 0, by default */
929 inode->u.reiserfs_i.nopack = 0;
930
931 pathrelse (path);
932 if (S_ISREG (inode->i_mode)) {
933 inode->i_op = &reiserfs_file_inode_operations;
934 inode->i_fop = &reiserfs_file_operations;
935 inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
936 } else if (S_ISDIR (inode->i_mode)) {
937 inode->i_op = &reiserfs_dir_inode_operations;
938 inode->i_fop = &reiserfs_dir_operations;
939 } else if (S_ISLNK (inode->i_mode)) {
940 inode->i_op = &page_symlink_inode_operations;
941 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
942 } else {
943 inode->i_blocks = 0;
944 init_special_inode(inode, inode->i_mode, rdev) ;
945 }
946 }
947
948
949 // update new stat data with inode fields
950 static void inode2sd (void * sd, struct inode * inode)
951 {
952 struct stat_data * sd_v2 = (struct stat_data *)sd;
953
954 sd_v2->sd_mode = cpu_to_le16 (inode->i_mode);
955 sd_v2->sd_nlink = cpu_to_le16 (inode->i_nlink);
956 sd_v2->sd_uid = cpu_to_le32 (inode->i_uid);
957 sd_v2->sd_size = cpu_to_le64 (inode->i_size);
958 sd_v2->sd_gid = cpu_to_le32 (inode->i_gid);
959 sd_v2->sd_mtime = cpu_to_le32 (inode->i_mtime);
960 sd_v2->sd_atime = cpu_to_le32 (inode->i_atime);
961 sd_v2->sd_ctime = cpu_to_le32 (inode->i_ctime);
962 sd_v2->sd_blocks = cpu_to_le32 (inode->i_blocks);
963 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
964 sd_v2->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
965 } else {
966 sd_v2->u.sd_generation = cpu_to_le32( inode -> i_generation );
967 }
968 }
969
970
971 // used to copy inode's fields to old stat data
972 static void inode2sd_v1 (void * sd, struct inode * inode)
973 {
974 struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
975
976 sd_v1->sd_mode = cpu_to_le16 (inode->i_mode);
977 sd_v1->sd_uid = cpu_to_le16 (inode->i_uid);
978 sd_v1->sd_gid = cpu_to_le16 (inode->i_gid);
979 sd_v1->sd_nlink = cpu_to_le16 (inode->i_nlink);
980 sd_v1->sd_size = cpu_to_le32 (inode->i_size);
981 sd_v1->sd_atime = cpu_to_le32 (inode->i_atime);
982 sd_v1->sd_ctime = cpu_to_le32 (inode->i_ctime);
983 sd_v1->sd_mtime = cpu_to_le32 (inode->i_mtime);
984 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
985 sd_v1->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
986 else
987 sd_v1->u.sd_blocks = cpu_to_le32 (inode->i_blocks);
988
989 // Sigh. i_first_direct_byte is back
990 sd_v1->sd_first_direct_byte = cpu_to_le32 (inode->u.reiserfs_i.i_first_direct_byte);
991 }
992
993
994 /* NOTE, you must prepare the buffer head before sending it here,
995 ** and then log it after the call
996 */
997 static void update_stat_data (struct path * path, struct inode * inode)
998 {
999 struct buffer_head * bh;
1000 struct item_head * ih;
1001
1002 bh = PATH_PLAST_BUFFER (path);
1003 ih = PATH_PITEM_HEAD (path);
1004
1005 if (!is_statdata_le_ih (ih))
1006 reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
1007 INODE_PKEY (inode), ih);
1008
1009 if (stat_data_v1 (ih)) {
1010 // path points to old stat data
1011 inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1012 } else {
1013 inode2sd (B_I_PITEM (bh, ih), inode);
1014 }
1015
1016 return;
1017 }
1018
1019
1020 void reiserfs_update_sd (struct reiserfs_transaction_handle *th,
1021 struct inode * inode)
1022 {
1023 struct cpu_key key;
1024 INITIALIZE_PATH(path);
1025 struct buffer_head *bh ;
1026 int fs_gen ;
1027 struct item_head *ih, tmp_ih ;
1028 int retval;
1029
1030 make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
1031
1032 for(;;) {
1033 int pos;
1034 /* look for the object's stat data */
1035 retval = search_item (inode->i_sb, &key, &path);
1036 if (retval == IO_ERROR) {
1037 reiserfs_warning ("vs-13050: reiserfs_update_sd: "
1038 "i/o failure occurred trying to update %K stat data",
1039 &key);
1040 return;
1041 }
1042 if (retval == ITEM_NOT_FOUND) {
1043 pos = PATH_LAST_POSITION (&path);
1044 pathrelse(&path) ;
1045 if (inode->i_nlink == 0) {
1046 /*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
1047 return;
1048 }
1049 reiserfs_warning ("vs-13060: reiserfs_update_sd: "
1050 "stat data of object %k (nlink == %d) not found (pos %d)\n",
1051 INODE_PKEY (inode), inode->i_nlink, pos);
1052 reiserfs_check_path(&path) ;
1053 return;
1054 }
1055
1056 /* sigh, prepare_for_journal might schedule. When it schedules the
1057 ** FS might change. We have to detect that, and loop back to the
1058 ** search if the stat data item has moved
1059 */
1060 bh = get_last_bh(&path) ;
1061 ih = get_ih(&path) ;
1062 copy_item_head (&tmp_ih, ih);
1063 fs_gen = get_generation (inode->i_sb);
1064 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1065 if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1066 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1067 continue ; /* Stat_data item has been moved after scheduling. */
1068 }
1069 break;
1070 }
1071 update_stat_data (&path, inode);
1072 journal_mark_dirty(th, th->t_super, bh) ;
1073 pathrelse (&path);
1074 return;
1075 }
1076
1077 void reiserfs_read_inode(struct inode *inode) {
1078 make_bad_inode(inode) ;
1079 }
1080
1081
1082 //
1083 // initially this function was derived from minix or ext2's analog and
1084 // evolved as the prototype did
1085 //
1086
1087 /* looks for stat data in the tree, and fills up the fields of in-core
1088 inode stat data fields */
1089 void reiserfs_read_inode2 (struct inode * inode, void *p)
1090 {
1091 INITIALIZE_PATH (path_to_sd);
1092 struct cpu_key key;
1093 struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ;
1094 unsigned long dirino;
1095 int retval;
1096
1097 if (!p) {
1098 make_bad_inode(inode) ;
1099 return;
1100 }
1101
1102 dirino = args->objectid ;
1103
1104 /* set version 1, version 2 could be used too, because stat data
1105 key is the same in both versions */
1106 key.version = ITEM_VERSION_1;
1107 key.on_disk_key.k_dir_id = dirino;
1108 key.on_disk_key.k_objectid = inode->i_ino;
1109 key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
1110 key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
1111
1112 /* look for the object's stat data */
1113 retval = search_item (inode->i_sb, &key, &path_to_sd);
1114 if (retval == IO_ERROR) {
1115 reiserfs_warning ("vs-13070: reiserfs_read_inode2: "
1116 "i/o failure occurred trying to find stat data of %K\n",
1117 &key);
1118 make_bad_inode(inode) ;
1119 return;
1120 }
1121 if (retval != ITEM_FOUND) {
1122 /* a stale NFS handle can trigger this without it being an error */
1123 pathrelse (&path_to_sd);
1124 make_bad_inode(inode) ;
1125 return;
1126 }
1127
1128 init_inode (inode, &path_to_sd);
1129 reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1130
1131 }
1132
1133
1134 struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key)
1135 {
1136 struct inode * inode;
1137 struct reiserfs_iget4_args args ;
1138
1139 args.objectid = key->on_disk_key.k_dir_id ;
1140 inode = iget4 (s, key->on_disk_key.k_objectid, 0, (void *)(&args));
1141 if (!inode)
1142 return ERR_PTR(-ENOMEM) ;
1143
1144 if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) {
1145 /* either due to i/o error or a stale NFS handle */
1146 iput (inode);
1147 inode = 0;
1148 }
1149 return inode;
1150 }
1151
1152 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data,
1153 int len, int fhtype, int parent) {
1154 struct cpu_key key ;
1155 struct inode *inode = NULL ;
1156 struct list_head *lp;
1157 struct dentry *result;
1158
1159 /* fhtype happens to reflect the number of u32s encoded.
1160 * due to a bug in earlier code, fhtype might indicate there
1161 * are more u32s then actually fitted.
1162 * so if fhtype seems to be more than len, reduce fhtype.
1163 * Valid types are:
1164 * 2 - objectid + dir_id - legacy support
1165 * 3 - objectid + dir_id + generation
1166 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1167 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1168 * 6 - as above plus generation of directory
1169 * 6 does not fit in NFSv2 handles
1170 */
1171 if (fhtype > len) {
1172 if (fhtype != 6 || len != 5)
1173 printk(KERN_WARNING "nfsd/reiserfs, fhtype=%d, len=%d - odd\n",
1174 fhtype, len);
1175 fhtype = 5;
1176 }
1177 if (fhtype < 2 || (parent && fhtype < 4))
1178 goto out ;
1179
1180 if (! parent) {
1181 /* this works for handles from old kernels because the default
1182 ** reiserfs generation number is the packing locality.
1183 */
1184 key.on_disk_key.k_objectid = data[0] ;
1185 key.on_disk_key.k_dir_id = data[1] ;
1186 inode = reiserfs_iget(sb, &key) ;
1187 if (inode && !IS_ERR(inode) && (fhtype == 3 || fhtype >= 5) &&
1188 data[2] != inode->i_generation) {
1189 iput(inode) ;
1190 inode = NULL ;
1191 }
1192 } else {
1193 key.on_disk_key.k_objectid = data[fhtype>=5?3:2] ;
1194 key.on_disk_key.k_dir_id = data[fhtype>=5?4:3] ;
1195 inode = reiserfs_iget(sb, &key) ;
1196 if (inode && !IS_ERR(inode) && fhtype == 6 &&
1197 data[5] != inode->i_generation) {
1198 iput(inode) ;
1199 inode = NULL ;
1200 }
1201 }
1202 out:
1203 if (IS_ERR(inode))
1204 return ERR_PTR(PTR_ERR(inode));
1205 if (!inode)
1206 return ERR_PTR(-ESTALE) ;
1207
1208 /* now to find a dentry.
1209 * If possible, get a well-connected one
1210 */
1211 spin_lock(&dcache_lock);
1212 for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
1213 result = list_entry(lp,struct dentry, d_alias);
1214 if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
1215 dget_locked(result);
1216 result->d_vfs_flags |= DCACHE_REFERENCED;
1217 spin_unlock(&dcache_lock);
1218 iput(inode);
1219 return result;
1220 }
1221 }
1222 spin_unlock(&dcache_lock);
1223 result = d_alloc_root(inode);
1224 if (result == NULL) {
1225 iput(inode);
1226 return ERR_PTR(-ENOMEM);
1227 }
1228 result->d_flags |= DCACHE_NFSD_DISCONNECTED;
1229 return result;
1230
1231 }
1232
1233 int reiserfs_dentry_to_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) {
1234 struct inode *inode = dentry->d_inode ;
1235 int maxlen = *lenp;
1236
1237 if (maxlen < 3)
1238 return 255 ;
1239
1240 data[0] = inode->i_ino ;
1241 data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1242 data[2] = inode->i_generation ;
1243 *lenp = 3 ;
1244 /* no room for directory info? return what we've stored so far */
1245 if (maxlen < 5 || ! need_parent)
1246 return 3 ;
1247
1248 inode = dentry->d_parent->d_inode ;
1249 data[3] = inode->i_ino ;
1250 data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1251 *lenp = 5 ;
1252 if (maxlen < 6)
1253 return 5 ;
1254 data[5] = inode->i_generation ;
1255 *lenp = 6 ;
1256 return 6 ;
1257 }
1258
1259
1260 //
1261 // initially this function was derived from minix or ext2's analog and
1262 // evolved as the prototype did
1263 //
1264 /* looks for stat data, then copies fields to it, marks the buffer
1265 containing stat data as dirty */
1266 /* reiserfs inodes are never really dirty, since the dirty inode call
1267 ** always logs them. This call allows the VFS inode marking routines
1268 ** to properly mark inodes for datasync and such, but only actually
1269 ** does something when called for a synchronous update.
1270 */
1271 void reiserfs_write_inode (struct inode * inode, int do_sync) {
1272 struct reiserfs_transaction_handle th ;
1273 int jbegin_count = 1 ;
1274
1275 if (inode->i_sb->s_flags & MS_RDONLY) {
1276 reiserfs_warning("clm-6005: writing inode %lu on readonly FS\n",
1277 inode->i_ino) ;
1278 return ;
1279 }
1280 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1281 ** these cases are just when the system needs ram, not when the
1282 ** inode needs to reach disk for safety, and they can safely be
1283 ** ignored because the altered inode has already been logged.
1284 */
1285 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1286 lock_kernel() ;
1287 journal_begin(&th, inode->i_sb, jbegin_count) ;
1288 reiserfs_update_sd (&th, inode);
1289 journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1290 unlock_kernel() ;
1291 }
1292 }
1293
1294 void reiserfs_dirty_inode (struct inode * inode) {
1295 struct reiserfs_transaction_handle th ;
1296
1297 if (inode->i_sb->s_flags & MS_RDONLY) {
1298 reiserfs_warning("clm-6006: writing inode %lu on readonly FS\n",
1299 inode->i_ino) ;
1300 return ;
1301 }
1302 lock_kernel() ;
1303 journal_begin(&th, inode->i_sb, 1) ;
1304 reiserfs_update_sd (&th, inode);
1305 journal_end(&th, inode->i_sb, 1) ;
1306 unlock_kernel() ;
1307 }
1308
1309
1310 /* FIXME: no need any more. right? */
1311 int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
1312 {
1313 int err = 0;
1314
1315 reiserfs_update_sd (th, inode);
1316 return err;
1317 }
1318
1319
1320 /* stat data of new object is inserted already, this inserts the item
1321 containing "." and ".." entries */
1322 static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
1323 struct item_head * ih, struct path * path, const struct inode * dir)
1324 {
1325 struct super_block * sb = th->t_super;
1326 char empty_dir [EMPTY_DIR_SIZE];
1327 char * body = empty_dir;
1328 struct cpu_key key;
1329 int retval;
1330
1331 _make_cpu_key (&key, ITEM_VERSION_1, le32_to_cpu (ih->ih_key.k_dir_id),
1332 le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
1333
1334 /* compose item head for new item. Directories consist of items of
1335 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1336 is done by reiserfs_new_inode */
1337 if (old_format_only (sb)) {
1338 make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1339
1340 make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1341 le32_to_cpu (INODE_PKEY (dir)->k_dir_id),
1342 le32_to_cpu (INODE_PKEY (dir)->k_objectid));
1343 } else {
1344 make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1345
1346 make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1347 le32_to_cpu (INODE_PKEY (dir)->k_dir_id),
1348 le32_to_cpu (INODE_PKEY (dir)->k_objectid));
1349 }
1350
1351 /* look for place in the tree for new item */
1352 retval = search_item (sb, &key, path);
1353 if (retval == IO_ERROR) {
1354 reiserfs_warning ("vs-13080: reiserfs_new_directory: "
1355 "i/o failure occurred creating new directory\n");
1356 return -EIO;
1357 }
1358 if (retval == ITEM_FOUND) {
1359 pathrelse (path);
1360 reiserfs_warning ("vs-13070: reiserfs_new_directory: "
1361 "object with this key exists (%k)", &(ih->ih_key));
1362 return -EEXIST;
1363 }
1364
1365 /* insert item, that is empty directory item */
1366 return reiserfs_insert_item (th, path, &key, ih, body);
1367 }
1368
1369
1370 /* stat data of object has been inserted, this inserts the item
1371 containing the body of symlink */
1372 static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1373 struct item_head * ih,
1374 struct path * path, const char * symname, int item_len)
1375 {
1376 struct super_block * sb = th->t_super;
1377 struct cpu_key key;
1378 int retval;
1379
1380 _make_cpu_key (&key, ITEM_VERSION_1,
1381 le32_to_cpu (ih->ih_key.k_dir_id),
1382 le32_to_cpu (ih->ih_key.k_objectid),
1383 1, TYPE_DIRECT, 3/*key length*/);
1384
1385 make_le_item_head (ih, 0, ITEM_VERSION_1, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
1386
1387 /* look for place in the tree for new item */
1388 retval = search_item (sb, &key, path);
1389 if (retval == IO_ERROR) {
1390 reiserfs_warning ("vs-13080: reiserfs_new_symlinik: "
1391 "i/o failure occurred creating new symlink\n");
1392 return -EIO;
1393 }
1394 if (retval == ITEM_FOUND) {
1395 pathrelse (path);
1396 reiserfs_warning ("vs-13080: reiserfs_new_symlink: "
1397 "object with this key exists (%k)", &(ih->ih_key));
1398 return -EEXIST;
1399 }
1400
1401 /* insert item, that is body of symlink */
1402 return reiserfs_insert_item (th, path, &key, ih, symname);
1403 }
1404
1405
1406 /* inserts the stat data into the tree, and then calls
1407 reiserfs_new_directory (to insert ".", ".." item if new object is
1408 directory) or reiserfs_new_symlink (to insert symlink body if new
1409 object is symlink) or nothing (if new object is regular file) */
1410 struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
1411 const struct inode * dir, int mode,
1412 const char * symname,
1413 int i_size, /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1414 strlen (symname) for symlinks)*/
1415 struct dentry *dentry, struct inode *inode, int * err)
1416 {
1417 struct super_block * sb;
1418 INITIALIZE_PATH (path_to_key);
1419 struct cpu_key key;
1420 struct item_head ih;
1421 struct stat_data sd;
1422 int retval;
1423
1424 if (!dir || !dir->i_nlink) {
1425 *err = -EPERM;
1426 iput(inode) ;
1427 return NULL;
1428 }
1429
1430 sb = dir->i_sb;
1431 inode->i_sb = sb;
1432 inode->i_flags = 0;//inode->i_sb->s_flags;
1433
1434 /* item head of new item */
1435 ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
1436 ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
1437 if (!ih.ih_key.k_objectid) {
1438 iput(inode) ;
1439 *err = -ENOMEM;
1440 return NULL;
1441 }
1442 if (old_format_only (sb))
1443 /* not a perfect generation count, as object ids can be reused, but this
1444 ** is as good as reiserfs can do right now.
1445 ** note that the private part of inode isn't filled in yet, we have
1446 ** to use the directory.
1447 */
1448 inode->i_generation = INODE_PKEY (dir)->k_objectid;
1449 else
1450 #if defined( USE_INODE_GENERATION_COUNTER )
1451 inode->i_generation =
1452 le32_to_cpu( sb -> u.reiserfs_sb.s_rs -> s_inode_generation );
1453 #else
1454 inode->i_generation = ++event;
1455 #endif
1456 if (old_format_only (sb))
1457 make_le_item_head (&ih, 0, ITEM_VERSION_1, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1458 else
1459 make_le_item_head (&ih, 0, ITEM_VERSION_2, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1460
1461
1462 /* key to search for correct place for new stat data */
1463 _make_cpu_key (&key, ITEM_VERSION_2, le32_to_cpu (ih.ih_key.k_dir_id),
1464 le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
1465
1466 /* find proper place for inserting of stat data */
1467 retval = search_item (sb, &key, &path_to_key);
1468 if (retval == IO_ERROR) {
1469 iput (inode);
1470 *err = -EIO;
1471 return NULL;
1472 }
1473 if (retval == ITEM_FOUND) {
1474 pathrelse (&path_to_key);
1475 iput (inode);
1476 *err = -EEXIST;
1477 return NULL;
1478 }
1479
1480 /* fill stat data */
1481 inode->i_mode = mode;
1482 inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
1483 inode->i_uid = current->fsuid;
1484 if (dir->i_mode & S_ISGID) {
1485 inode->i_gid = dir->i_gid;
1486 if (S_ISDIR(mode))
1487 inode->i_mode |= S_ISGID;
1488 } else
1489 inode->i_gid = current->fsgid;
1490
1491 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1492 inode->i_size = i_size;
1493 inode->i_blocks = (inode->i_size + 511) >> 9;
1494 inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 :
1495 U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1496
1497 INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
1498
1499 if (old_format_only (sb))
1500 inode2sd_v1 (&sd, inode);
1501 else
1502 inode2sd (&sd, inode);
1503
1504 // these do not go to on-disk stat data
1505 inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1506 inode->i_blksize = PAGE_SIZE;
1507 inode->i_dev = sb->s_dev;
1508
1509 // store in in-core inode the key of stat data and version all
1510 // object items will have (directory items will have old offset
1511 // format, other new objects will consist of new items)
1512 memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
1513 if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
1514 inode_items_version (inode) = ITEM_VERSION_1;
1515 else
1516 inode_items_version (inode) = ITEM_VERSION_2;
1517
1518 /* insert the stat data into the tree */
1519 retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1520 if (retval) {
1521 iput (inode);
1522 *err = retval;
1523 reiserfs_check_path(&path_to_key) ;
1524 return NULL;
1525 }
1526
1527 if (S_ISDIR(mode)) {
1528 /* insert item with "." and ".." */
1529 retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1530 }
1531
1532 if (S_ISLNK(mode)) {
1533 /* insert body of symlink */
1534 if (!old_format_only (sb))
1535 i_size = ROUND_UP(i_size);
1536 retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1537 }
1538 if (retval) {
1539 inode->i_nlink = 0;
1540 iput (inode);
1541 *err = retval;
1542 reiserfs_check_path(&path_to_key) ;
1543 return NULL;
1544 }
1545
1546 insert_inode_hash (inode);
1547 // we do not mark inode dirty: on disk content matches to the
1548 // in-core one
1549 reiserfs_check_path(&path_to_key) ;
1550
1551 return inode;
1552 }
1553
1554 /*
1555 ** finds the tail page in the page cache,
1556 ** reads the last block in.
1557 **
1558 ** On success, page_result is set to a locked, pinned page, and bh_result
1559 ** is set to an up to date buffer for the last block in the file. returns 0.
1560 **
1561 ** tail conversion is not done, so bh_result might not be valid for writing
1562 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1563 ** trying to write the block.
1564 **
1565 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1566 */
1567 static int grab_tail_page(struct inode *p_s_inode,
1568 struct page **page_result,
1569 struct buffer_head **bh_result) {
1570
1571 /* we want the page with the last byte in the file,
1572 ** not the page that will hold the next byte for appending
1573 */
1574 unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
1575 unsigned long pos = 0 ;
1576 unsigned long start = 0 ;
1577 unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
1578 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
1579 struct buffer_head *bh ;
1580 struct buffer_head *head ;
1581 struct page * page ;
1582 int error ;
1583
1584 /* we know that we are only called with inode->i_size > 0.
1585 ** we also know that a file tail can never be as big as a block
1586 ** If i_size % blocksize == 0, our file is currently block aligned
1587 ** and it won't need converting or zeroing after a truncate.
1588 */
1589 if ((offset & (blocksize - 1)) == 0) {
1590 return -ENOENT ;
1591 }
1592 page = grab_cache_page(p_s_inode->i_mapping, index) ;
1593 error = -ENOMEM ;
1594 if (!page) {
1595 goto out ;
1596 }
1597 /* start within the page of the last block in the file */
1598 start = (offset / blocksize) * blocksize ;
1599
1600 error = block_prepare_write(page, start, offset,
1601 reiserfs_get_block_create_0) ;
1602 if (error)
1603 goto unlock ;
1604
1605 kunmap(page) ; /* mapped by block_prepare_write */
1606
1607 head = page->buffers ;
1608 bh = head;
1609 do {
1610 if (pos >= start) {
1611 break ;
1612 }
1613 bh = bh->b_this_page ;
1614 pos += blocksize ;
1615 } while(bh != head) ;
1616
1617 if (!buffer_uptodate(bh)) {
1618 /* note, this should never happen, prepare_write should
1619 ** be taking care of this for us. If the buffer isn't up to date,
1620 ** I've screwed up the code to find the buffer, or the code to
1621 ** call prepare_write
1622 */
1623 reiserfs_warning("clm-6000: error reading block %lu on dev %s\n",
1624 bh->b_blocknr, kdevname(bh->b_dev)) ;
1625 error = -EIO ;
1626 goto unlock ;
1627 }
1628 *bh_result = bh ;
1629 *page_result = page ;
1630
1631 out:
1632 return error ;
1633
1634 unlock:
1635 UnlockPage(page) ;
1636 page_cache_release(page) ;
1637 return error ;
1638 }
1639
1640 /*
1641 ** vfs version of truncate file. Must NOT be called with
1642 ** a transaction already started.
1643 **
1644 ** some code taken from block_truncate_page
1645 */
1646 void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
1647 struct reiserfs_transaction_handle th ;
1648 int windex ;
1649
1650 /* we want the offset for the first byte after the end of the file */
1651 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1652 unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
1653 unsigned length ;
1654 struct page *page = NULL ;
1655 int error ;
1656 struct buffer_head *bh = NULL ;
1657
1658 if (p_s_inode->i_size > 0) {
1659 if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1660 // -ENOENT means we truncated past the end of the file,
1661 // and get_block_create_0 could not find a block to read in,
1662 // which is ok.
1663 if (error != -ENOENT)
1664 reiserfs_warning("clm-6001: grab_tail_page failed %d\n", error);
1665 page = NULL ;
1666 bh = NULL ;
1667 }
1668 }
1669
1670 /* so, if page != NULL, we have a buffer head for the offset at
1671 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
1672 ** then we have an unformatted node. Otherwise, we have a direct item,
1673 ** and no zeroing is required on disk. We zero after the truncate,
1674 ** because the truncate might pack the item anyway
1675 ** (it will unmap bh if it packs).
1676 */
1677 journal_begin(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 ) ;
1678 windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
1679 reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1680 pop_journal_writer(windex) ;
1681 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 ) ;
1682
1683 if (page) {
1684 length = offset & (blocksize - 1) ;
1685 /* if we are not on a block boundary */
1686 if (length) {
1687 length = blocksize - length ;
1688 memset((char *)kmap(page) + offset, 0, length) ;
1689 flush_dcache_page(page) ;
1690 kunmap(page) ;
1691 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1692 mark_buffer_dirty(bh) ;
1693 }
1694 }
1695 UnlockPage(page) ;
1696 page_cache_release(page) ;
1697 }
1698
1699 return ;
1700 }
1701
1702 static int map_block_for_writepage(struct inode *inode,
1703 struct buffer_head *bh_result,
1704 unsigned long block) {
1705 struct reiserfs_transaction_handle th ;
1706 int fs_gen ;
1707 struct item_head tmp_ih ;
1708 struct item_head *ih ;
1709 struct buffer_head *bh ;
1710 __u32 *item ;
1711 struct cpu_key key ;
1712 INITIALIZE_PATH(path) ;
1713 int pos_in_item ;
1714 int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
1715 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
1716 int retval ;
1717 int use_get_block = 0 ;
1718 int bytes_copied = 0 ;
1719 int copy_size ;
1720
1721 start_over:
1722 lock_kernel() ;
1723 journal_begin(&th, inode->i_sb, jbegin_count) ;
1724
1725 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
1726
1727 research:
1728 retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
1729 if (retval != POSITION_FOUND) {
1730 use_get_block = 1;
1731 goto out ;
1732 }
1733
1734 bh = get_last_bh(&path) ;
1735 ih = get_ih(&path) ;
1736 item = get_item(&path) ;
1737 pos_in_item = path.pos_in_item ;
1738
1739 /* we've found an unformatted node */
1740 if (indirect_item_found(retval, ih)) {
1741 if (bytes_copied > 0) {
1742 reiserfs_warning("clm-6002: bytes_copied %d\n", bytes_copied) ;
1743 }
1744 if (!item[pos_in_item]) {
1745 /* crap, we are writing to a hole */
1746 use_get_block = 1;
1747 goto out ;
1748 }
1749 set_block_dev_mapped(bh_result, le32_to_cpu(item[pos_in_item]), inode);
1750 mark_buffer_uptodate(bh_result, 1);
1751 } else if (is_direct_le_ih(ih)) {
1752 char *p ;
1753 p = page_address(bh_result->b_page) ;
1754 p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
1755 copy_size = le16_to_cpu(ih->ih_item_len) - pos_in_item ;
1756
1757 fs_gen = get_generation(inode->i_sb) ;
1758 copy_item_head(&tmp_ih, ih) ;
1759 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1760 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
1761 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1762 goto research;
1763 }
1764
1765 memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1766
1767 journal_mark_dirty(&th, inode->i_sb, bh) ;
1768 bytes_copied += copy_size ;
1769 set_block_dev_mapped(bh_result, 0, inode);
1770 mark_buffer_uptodate(bh_result, 1);
1771
1772 /* are there still bytes left? */
1773 if (bytes_copied < bh_result->b_size &&
1774 (byte_offset + bytes_copied) < inode->i_size) {
1775 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
1776 goto research ;
1777 }
1778 } else {
1779 reiserfs_warning("clm-6003: bad item inode %lu, device %s\n", inode->i_ino, kdevname(inode->i_sb->s_dev)) ;
1780 retval = -EIO ;
1781 goto out ;
1782 }
1783 retval = 0 ;
1784
1785 out:
1786 pathrelse(&path) ;
1787 journal_end(&th, inode->i_sb, jbegin_count) ;
1788 unlock_kernel() ;
1789
1790 /* this is where we fill in holes in the file. */
1791 if (use_get_block) {
1792 kmap(bh_result->b_page) ;
1793 retval = reiserfs_get_block(inode, block, bh_result,
1794 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
1795 kunmap(bh_result->b_page) ;
1796 if (!retval) {
1797 if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
1798 /* get_block failed to find a mapped unformatted node. */
1799 use_get_block = 0 ;
1800 goto start_over ;
1801 }
1802 }
1803 }
1804 return retval ;
1805 }
1806
1807 /* helper func to get a buffer head ready for writepage to send to
1808 ** ll_rw_block
1809 */
1810 static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
1811 struct buffer_head *bh ;
1812 int i;
1813 for(i = 0 ; i < nr ; i++) {
1814 bh = bhp[i] ;
1815 lock_buffer(bh) ;
1816 set_buffer_async_io(bh) ;
1817 /* submit_bh doesn't care if the buffer is dirty, but nobody
1818 ** later on in the call chain will be cleaning it. So, we
1819 ** clean the buffer here, it still gets written either way.
1820 */
1821 clear_bit(BH_Dirty, &bh->b_state) ;
1822 set_bit(BH_Uptodate, &bh->b_state) ;
1823 submit_bh(WRITE, bh) ;
1824 }
1825 }
1826
1827 static int reiserfs_write_full_page(struct page *page) {
1828 struct inode *inode = page->mapping->host ;
1829 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
1830 unsigned last_offset = PAGE_CACHE_SIZE;
1831 int error = 0;
1832 unsigned long block ;
1833 unsigned cur_offset = 0 ;
1834 struct buffer_head *head, *bh ;
1835 int partial = 0 ;
1836 struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
1837 int nr = 0 ;
1838
1839 if (!page->buffers) {
1840 block_prepare_write(page, 0, 0, NULL) ;
1841 kunmap(page) ;
1842 }
1843 /* last page in the file, zero out any contents past the
1844 ** last byte in the file
1845 */
1846 if (page->index >= end_index) {
1847 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1848 /* no file contents in this page */
1849 if (page->index >= end_index + 1 || !last_offset) {
1850 error = -EIO ;
1851 goto fail ;
1852 }
1853 memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
1854 flush_dcache_page(page) ;
1855 kunmap(page) ;
1856 }
1857 head = page->buffers ;
1858 bh = head ;
1859 block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
1860 do {
1861 /* if this offset in the page is outside the file */
1862 if (cur_offset >= last_offset) {
1863 if (!buffer_uptodate(bh))
1864 partial = 1 ;
1865 } else {
1866 /* fast path, buffer mapped to an unformatted node */
1867 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1868 arr[nr++] = bh ;
1869 } else {
1870 /* buffer not mapped yet, or points to a direct item.
1871 ** search and dirty or log
1872 */
1873 if ((error = map_block_for_writepage(inode, bh, block))) {
1874 goto fail ;
1875 }
1876 /* map_block_for_writepage either found an unformatted node
1877 ** and mapped it for us, or it found a direct item
1878 ** and logged the changes.
1879 */
1880 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1881 arr[nr++] = bh ;
1882 }
1883 }
1884 }
1885 bh = bh->b_this_page ;
1886 cur_offset += bh->b_size ;
1887 block++ ;
1888 } while(bh != head) ;
1889
1890 /* if this page only had a direct item, it is very possible for
1891 ** nr == 0 without there being any kind of error.
1892 */
1893 if (nr) {
1894 submit_bh_for_writepage(arr, nr) ;
1895 } else {
1896 UnlockPage(page) ;
1897 }
1898 if (!partial)
1899 SetPageUptodate(page) ;
1900
1901 return 0 ;
1902
1903 fail:
1904 if (nr) {
1905 submit_bh_for_writepage(arr, nr) ;
1906 } else {
1907 UnlockPage(page) ;
1908 }
1909 ClearPageUptodate(page) ;
1910 return error ;
1911 }
1912
1913 //
1914 // this is exactly what 2.3.99-pre9's ext2_readpage is
1915 //
1916 static int reiserfs_readpage (struct file *f, struct page * page)
1917 {
1918 return block_read_full_page (page, reiserfs_get_block);
1919 }
1920
1921
1922 //
1923 // modified from ext2_writepage is
1924 //
1925 static int reiserfs_writepage (struct page * page)
1926 {
1927 struct inode *inode = page->mapping->host ;
1928 reiserfs_wait_on_write_block(inode->i_sb) ;
1929 return reiserfs_write_full_page(page) ;
1930 }
1931
1932
1933 //
1934 // from ext2_prepare_write, but modified
1935 //
1936 int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to) {
1937 struct inode *inode = page->mapping->host ;
1938 reiserfs_wait_on_write_block(inode->i_sb) ;
1939 fix_tail_page_for_writing(page) ;
1940 return block_prepare_write(page, from, to, reiserfs_get_block) ;
1941 }
1942
1943
1944 //
1945 // this is exactly what 2.3.99-pre9's ext2_bmap is
1946 //
1947 static int reiserfs_aop_bmap(struct address_space *as, long block) {
1948 return generic_block_bmap(as, block, reiserfs_bmap) ;
1949 }
1950
1951 static int reiserfs_commit_write(struct file *f, struct page *page,
1952 unsigned from, unsigned to) {
1953 struct inode *inode = page->mapping->host;
1954 int ret ;
1955
1956 reiserfs_wait_on_write_block(inode->i_sb) ;
1957 ret = generic_commit_write(f, page, from, to) ;
1958
1959 /* we test for O_SYNC here so we can commit the transaction
1960 ** for any packed tails the file might have had
1961 */
1962 if (f->f_flags & O_SYNC) {
1963 struct reiserfs_transaction_handle th ;
1964 lock_kernel() ;
1965 journal_begin(&th, inode->i_sb, 1) ;
1966 reiserfs_prepare_for_journal(inode->i_sb,
1967 SB_BUFFER_WITH_SB(inode->i_sb), 1) ;
1968 journal_mark_dirty(&th, inode->i_sb, SB_BUFFER_WITH_SB(inode->i_sb)) ;
1969 journal_end_sync(&th, inode->i_sb, 1) ;
1970 unlock_kernel() ;
1971 }
1972 return ret ;
1973 }
1974
1975 struct address_space_operations reiserfs_address_space_operations = {
1976 writepage: reiserfs_writepage,
1977 readpage: reiserfs_readpage,
1978 sync_page: block_sync_page,
1979 prepare_write: reiserfs_prepare_write,
1980 commit_write: reiserfs_commit_write,
1981 bmap: reiserfs_aop_bmap
1982 } ;
1983