File: /usr/src/linux/fs/reiserfs/buffer2.c

1     /*
2      *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README  
3      */
4     
5     
6     /*
7      * Contains code from
8      *
9      *  linux/include/linux/lock.h and linux/fs/buffer.c /linux/fs/minix/fsync.c
10      *
11      *  Copyright (C) 1991, 1992  Linus Torvalds
12      */
13     
14     #include <linux/config.h>
15     #include <linux/sched.h>
16     #include <linux/locks.h>
17     #include <linux/reiserfs_fs.h>
18     #include <linux/smp_lock.h>
19     
20     /*
21      *  wait_buffer_until_released
22      *  reiserfs_bread
23      *  reiserfs_getblk
24      *  get_new_buffer
25      */
26     
27     
28     
29     /* when we allocate a new block (get_new_buffer, get_empty_nodes) and
30        get buffer for it, it is possible that it is held by someone else
31        or even by this process. In this function we wait until all other
32        holders release buffer. To make sure, that current process does not
33        hold we did free all buffers in tree balance structure
34        (get_empty_nodes and get_nodes_for_preserving) or in path structure
35        only (get_new_buffer) just before calling this */
36     void wait_buffer_until_released (struct buffer_head * bh)
37     {
38       int repeat_counter = 0;
39     
40       while (atomic_read (&(bh->b_count)) > 1) {
41     
42         if ( !(++repeat_counter % 30000000) ) {
43           reiserfs_warning ("vs-3050: wait_buffer_until_released: nobody releases buffer (%b). Still waiting (%d) %cJDIRTY %cJWAIT\n",
44     			bh, repeat_counter, buffer_journaled(bh) ? ' ' : '!',
45     			buffer_journal_dirty(bh) ? ' ' : '!');
46         }
47         run_task_queue(&tq_disk);
48         current->policy |= SCHED_YIELD;
49         /*current->counter = 0;*/
50         schedule();
51       }
52       if (repeat_counter > 30000000) {
53         reiserfs_warning("vs-3051: done waiting, ignore vs-3050 messages for (%b)\n", bh) ;
54       }
55     }
56     
57     /*
58      * reiserfs_bread() reads a specified block and returns the buffer that contains
59      * it. It returns NULL if the block was unreadable.
60      */
61     /* It first tries to find the block in cache, and if it cannot do so
62        then it creates a new buffer and schedules I/O to read the
63        block. */
64     /* The function is NOT SCHEDULE-SAFE! */
65     
66     struct buffer_head  * reiserfs_bread (kdev_t n_dev, int n_block, int n_size) 
67     {
68         return bread (n_dev, n_block, n_size);
69     }
70     
71     /* This function looks for a buffer which contains a given block.  If
72        the block is in cache it returns it, otherwise it returns a new
73        buffer which is not uptodate.  This is called by reiserfs_bread and
74        other functions. Note that get_new_buffer ought to be called this
75        and this ought to be called get_new_buffer, since this doesn't
76        actually get the block off of the disk. */
77     /* The function is NOT SCHEDULE-SAFE! */
78     
79     struct buffer_head  * reiserfs_getblk (kdev_t n_dev, int n_block, int n_size)
80     {
81         return getblk (n_dev, n_block, n_size);
82     }
83     
84     #ifdef NEW_GET_NEW_BUFFER
85     
86     /* returns one buffer with a blocknr near blocknr. */
87     static int get_new_buffer_near_blocknr(
88                        struct super_block *  p_s_sb,
89                        int blocknr,
90                        struct buffer_head ** pp_s_new_bh,
91                        struct path         * p_s_path 
92                        ) {
93       unsigned      long n_new_blocknumber = 0;
94       int           n_ret_value,
95                     n_repeat = CARRY_ON;
96     
97     #ifdef CONFIG_REISERFS_CHECK
98       int repeat_counter = 0;
99       
100       if (!blocknr)
101         printk ("blocknr passed to get_new_buffer_near_blocknr was 0");
102     #endif
103     
104     
105       if ( (n_ret_value = reiserfs_new_blocknrs (p_s_sb, &n_new_blocknumber,
106                                                  blocknr, 1)) == NO_DISK_SPACE )
107         return NO_DISK_SPACE;
108       
109       *pp_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, n_new_blocknumber, p_s_sb->s_blocksize);
110       if ( buffer_uptodate(*pp_s_new_bh) ) {
111     
112     #ifdef CONFIG_REISERFS_CHECK
113         if ( buffer_dirty(*pp_s_new_bh) || (*pp_s_new_bh)->b_dev == NODEV ) {
114           reiserfs_panic(p_s_sb, "PAP-14080: get_new_buffer: invalid uptodate buffer %b for the new block", *pp_s_new_bh);
115         }
116     #endif
117     
118         /* Free path buffers to prevent deadlock. */
119         /* It is possible that this process has the buffer, which this function is getting, already in
120            its path, and is responsible for double incrementing the value of b_count.  If we recalculate
121            the path after schedule we can avoid risking an endless loop.  This problematic situation is
122            possible in a multiple processing environment.  Suppose process 1 has acquired a path P; then
123            process 2 balanced and remove block A from the tree.  Process 1 continues and runs
124            get_new_buffer, that returns buffer with block A. If node A was on the path P, then it will
125            have b_count == 2. If we now will simply wait in while ( (*pp_s_new_bh)->b_count > 1 ) we get
126            into an endless loop, as nobody will release this buffer and the current process holds buffer
127            twice. That is why we do decrement_counters_in_path(p_s_path) before waiting until b_count
128            becomes 1. (it there were other processes holding node A, then eventually we will get a
129            moment, when all of them released a buffer). */
130         if ( atomic_read (&((*pp_s_new_bh)->b_count)) > 1  ) {
131           decrement_counters_in_path(p_s_path);
132           n_ret_value |= SCHEDULE_OCCURRED;
133         }
134     
135         while ( atomic_read (&((*pp_s_new_bh)->b_count)) > 1 ) {
136     
137     #ifdef REISERFS_INFO
138           printk("get_new_buffer() calls schedule to decrement b_count\n");
139     #endif
140     
141     #ifdef CONFIG_REISERFS_CHECK
142           if ( ! (++repeat_counter % 10000) )
143     	printk("get_new_buffer(%u): counter(%d) too big", current->pid, repeat_counter);
144     #endif
145     
146           current->counter = 0;
147           schedule();
148         }
149     
150     #ifdef CONFIG_REISERFS_CHECK
151         if ( buffer_dirty(*pp_s_new_bh) || (*pp_s_new_bh)->b_dev == NODEV ) {
152           print_buffer_head(*pp_s_new_bh,"get_new_buffer");
153           reiserfs_panic(p_s_sb, "PAP-14090: get_new_buffer: invalid uptodate buffer %b for the new block(case 2)", *pp_s_new_bh);
154         }
155     #endif
156     
157       }
158       else {
159         ;
160     
161     #ifdef CONFIG_REISERFS_CHECK
162         if (atomic_read (&((*pp_s_new_bh)->b_count)) != 1) {
163           reiserfs_panic(p_s_sb,"PAP-14100: get_new_buffer: not uptodate buffer %b for the new block has b_count more than one",
164     		     *pp_s_new_bh);
165         }
166     #endif
167     
168       }
169       return (n_ret_value | n_repeat);
170     }
171     
172     
173     /* returns the block number of the last unformatted node, assumes p_s_key_to_search.k_offset is a byte in the tail of
174        the file, Useful for when you want to append to a file, and convert a direct item into an unformatted node near the
175        last unformatted node of the file.  Putting the unformatted node near the direct item is potentially very bad to do.
176        If there is no unformatted node in the file, then we return the block number of the direct item.  */
177     /* The function is NOT SCHEDULE-SAFE! */
178     inline int get_last_unformatted_node_blocknr_of_file(  struct key * p_s_key_to_search, struct super_block * p_s_sb,
179                                                            struct buffer_head * p_s_bh
180                                                            struct path * p_unf_search_path, struct inode * p_s_inode)
181     
182     {
183       struct key unf_key_to_search;
184       struct item_head * p_s_ih;
185       int n_pos_in_item;
186       struct buffer_head * p_indirect_item_bh;
187     
188           copy_key(&unf_key_to_search,p_s_key_to_search);
189           unf_key_to_search.k_uniqueness = TYPE_INDIRECT;
190           unf_key_to_search.k_offset = p_s_inode->u.reiserfs_i.i_first_direct_byte - 1;
191     
192             /* p_s_key_to_search->k_offset -  MAX_ITEM_LEN(p_s_sb->s_blocksize); */
193           if (search_for_position_by_key (p_s_sb, &unf_key_to_search, p_unf_search_path, &n_pos_in_item) == POSITION_FOUND)
194             {
195               p_s_ih = B_N_PITEM_HEAD(p_indirect_item_bh = PATH_PLAST_BUFFER(p_unf_search_path), PATH_LAST_POSITION(p_unf_search_path));
196               return (B_I_POS_UNFM_POINTER(p_indirect_item_bh, p_s_ih, n_pos_in_item));
197             }
198          /*  else */
199           printk("reiser-1800: search for unformatted node failed, p_s_key_to_search->k_offset = %u,  unf_key_to_search.k_offset = %u, MAX_ITEM_LEN(p_s_sb->s_blocksize) = %ld, debug this\n", p_s_key_to_search->k_offset, unf_key_to_search.k_offset,  MAX_ITEM_LEN(p_s_sb->s_blocksize) );
200           print_buffer_head(PATH_PLAST_BUFFER(p_unf_search_path), "the buffer holding the item before the key we failed to find");
201           print_block_head(PATH_PLAST_BUFFER(p_unf_search_path), "the block head");
202           return 0;                         /* keeps the compiler quiet */
203     }
204     
205     
206                                     /* hasn't been out of disk space tested  */
207     /* The function is NOT SCHEDULE-SAFE! */
208     static int get_buffer_near_last_unf ( struct super_block * p_s_sb, struct key * p_s_key_to_search,
209                                                      struct inode *  p_s_inode,  struct buffer_head * p_s_bh, 
210                                                      struct buffer_head ** pp_s_un_bh, struct path * p_s_search_path)
211     {
212       int unf_blocknr = 0, /* blocknr from which we start search for a free block for an unformatted node, if 0
213                               then we didn't find an unformatted node though we might have found a file hole */
214           n_repeat = CARRY_ON;
215       struct key unf_key_to_search;
216       struct path unf_search_path;
217     
218       copy_key(&unf_key_to_search,p_s_key_to_search);
219       unf_key_to_search.k_uniqueness = TYPE_INDIRECT;
220       
221       if (
222           (p_s_inode->u.reiserfs_i.i_first_direct_byte > 4095) /* i_first_direct_byte gets used for all sorts of
223                                                                   crap other than what the name indicates, thus
224                                                                   testing to see if it is 0 is not enough */
225           && (p_s_inode->u.reiserfs_i.i_first_direct_byte < MAX_KEY_OFFSET) /* if there is no direct item then
226                                                                                i_first_direct_byte = MAX_KEY_OFFSET */
227           )
228         {
229                                     /* actually, we don't want the last unformatted node, we want the last unformatted node
230                                        which is before the current file offset */
231           unf_key_to_search.k_offset = ((p_s_inode->u.reiserfs_i.i_first_direct_byte -1) < unf_key_to_search.k_offset) ? p_s_inode->u.reiserfs_i.i_first_direct_byte -1 :  unf_key_to_search.k_offset;
232     
233           while (unf_key_to_search.k_offset > -1)
234             {
235                                     /* This is our poorly documented way of initializing paths. -Hans */
236               init_path (&unf_search_path);
237                                     /* get the blocknr from which we start the search for a free block. */
238               unf_blocknr = get_last_unformatted_node_blocknr_of_file(  p_s_key_to_search, /* assumes this points to the file tail */
239                                                                         p_s_sb,     /* lets us figure out the block size */
240                                                                         p_s_bh, /* if there is no unformatted node in the file,
241                                                                                    then it returns p_s_bh->b_blocknr */
242                                                                         &unf_search_path,
243                                                                         p_s_inode
244                                                                         );
245     /*        printk("in while loop: unf_blocknr = %d,  *pp_s_un_bh = %p\n", unf_blocknr, *pp_s_un_bh); */
246               if (unf_blocknr) 
247                 break;
248               else                  /* release the path and search again, this could be really slow for huge
249                                        holes.....better to spend the coding time adding compression though.... -Hans */
250                 {
251                                     /* Vladimir, is it a problem that I don't brelse these buffers ?-Hans */
252                   decrement_counters_in_path(&unf_search_path);
253                   unf_key_to_search.k_offset -= 4096;
254                 }
255             }
256           if (unf_blocknr) {
257             n_repeat |= get_new_buffer_near_blocknr(p_s_sb, unf_blocknr, pp_s_un_bh, p_s_search_path);
258           }
259           else {                    /* all unformatted nodes are holes */
260             n_repeat |= get_new_buffer_near_blocknr(p_s_sb, p_s_bh->b_blocknr, pp_s_un_bh, p_s_search_path); 
261           }
262         }
263       else {                        /* file has no unformatted nodes */
264         n_repeat |= get_new_buffer_near_blocknr(p_s_sb, p_s_bh->b_blocknr, pp_s_un_bh, p_s_search_path);
265     /*     printk("in else: unf_blocknr = %d,  *pp_s_un_bh = %p\n", unf_blocknr, *pp_s_un_bh); */
266     /*     print_path (0,  p_s_search_path); */
267       }
268     
269       return n_repeat;
270     }
271     
272     #endif /* NEW_GET_NEW_BUFFER */
273     
274     
275     #ifdef OLD_GET_NEW_BUFFER
276     
277     /* The function is NOT SCHEDULE-SAFE! */
278     int get_new_buffer(
279     		   struct reiserfs_transaction_handle *th, 
280     		   struct buffer_head *  p_s_bh,
281     		   struct buffer_head ** pp_s_new_bh,
282     		   struct path	       * p_s_path
283     		   ) {
284       unsigned	long n_new_blocknumber = 0;
285       int		n_repeat;
286       struct super_block *	 p_s_sb = th->t_super;
287     
288       if ( (n_repeat = reiserfs_new_unf_blocknrs (th, &n_new_blocknumber, p_s_bh->b_blocknr)) == NO_DISK_SPACE )
289         return NO_DISK_SPACE;
290       
291       *pp_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, n_new_blocknumber, p_s_sb->s_blocksize);
292       if (atomic_read (&(*pp_s_new_bh)->b_count) > 1) {
293         /* Free path buffers to prevent deadlock which can occur in the
294            situation like : this process holds p_s_path; Block
295            (*pp_s_new_bh)->b_blocknr is on the path p_s_path, but it is
296            not necessary, that *pp_s_new_bh is in the tree; process 2
297            could remove it from the tree and freed block
298            (*pp_s_new_bh)->b_blocknr. Reiserfs_new_blocknrs in above
299            returns block (*pp_s_new_bh)->b_blocknr. Reiserfs_getblk gets
300            buffer for it, and it has b_count > 1. If we now will simply
301            wait in while ( (*pp_s_new_bh)->b_count > 1 ) we get into an
302            endless loop, as nobody will release this buffer and the
303            current process holds buffer twice. That is why we do
304            decrement_counters_in_path(p_s_path) before waiting until
305            b_count becomes 1. (it there were other processes holding node
306            pp_s_new_bh, then eventually we will get a moment, when all of
307            them released a buffer). */
308         decrement_counters_in_path(p_s_path);
309         wait_buffer_until_released (*pp_s_new_bh);
310         n_repeat |= SCHEDULE_OCCURRED;
311       }
312     
313     #ifdef CONFIG_REISERFS_CHECK
314       if ( atomic_read (&((*pp_s_new_bh)->b_count)) != 1 || buffer_dirty (*pp_s_new_bh)) {
315         reiserfs_panic(p_s_sb,"PAP-14100: get_new_buffer: not free or dirty buffer %b for the new block",
316     		   *pp_s_new_bh);
317       }
318     #endif
319     
320       return n_repeat;
321     }
322     
323     #endif /* OLD_GET_NEW_BUFFER */
324     
325     
326     #ifdef GET_MANY_BLOCKNRS
327                                     /* code not yet functional */
328     get_next_blocknr (
329                       unsigned long *       p_blocknr_array,          /* we get a whole bunch of blocknrs all at once for
330                                                                          the write.  This is better than getting them one at
331                                                                          a time.  */
332                       unsigned long **      p_blocknr_index,        /* pointer to current offset into the array. */
333                       unsigned long        blocknr_array_length
334     )
335     {
336       unsigned long return_value;
337     
338       if (*p_blocknr_index < p_blocknr_array + blocknr_array_length) {
339         return_value = **p_blocknr_index;
340         **p_blocknr_index = 0;
341         *p_blocknr_index++;
342         return (return_value);
343       }
344       else
345         {
346           kfree (p_blocknr_array);
347         }
348     }
349     #endif /* GET_MANY_BLOCKNRS */
350     
351