File: /usr/src/linux/fs/reiserfs/fix_node.c
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 /**
6 ** old_item_num
7 ** old_entry_num
8 ** set_entry_sizes
9 ** create_virtual_node
10 ** check_left
11 ** check_right
12 ** directory_part_size
13 ** get_num_ver
14 ** set_parameters
15 ** is_leaf_removable
16 ** are_leaves_removable
17 ** get_empty_nodes
18 ** get_lfree
19 ** get_rfree
20 ** is_left_neighbor_in_cache
21 ** decrement_key
22 ** get_far_parent
23 ** get_parents
24 ** can_node_be_removed
25 ** ip_check_balance
26 ** dc_check_balance_internal
27 ** dc_check_balance_leaf
28 ** dc_check_balance
29 ** check_balance
30 ** get_direct_parent
31 ** get_neighbors
32 ** fix_nodes
33 **
34 **
35 **/
36
37
38 #include <linux/config.h>
39 #include <linux/sched.h>
40 #include <linux/string.h>
41 #include <linux/locks.h>
42 #include <linux/reiserfs_fs.h>
43
44
45 /* To make any changes in the tree we find a node, that contains item
46 to be changed/deleted or position in the node we insert a new item
47 to. We call this node S. To do balancing we need to decide what we
48 will shift to left/right neighbor, or to a new node, where new item
49 will be etc. To make this analysis simpler we build virtual
50 node. Virtual node is an array of items, that will replace items of
51 node S. (For instance if we are going to delete an item, virtual
52 node does not contain it). Virtual node keeps information about
53 item sizes and types, mergeability of first and last items, sizes
54 of all entries in directory item. We use this array of items when
55 calculating what we can shift to neighbors and how many nodes we
56 have to have if we do not any shiftings, if we shift to left/right
57 neighbor or to both. */
58
59
60 /* taking item number in virtual node, returns number of item, that it has in source buffer */
61 static inline int old_item_num (int new_num, int affected_item_num, int mode)
62 {
63 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
64 return new_num;
65
66 if (mode == M_INSERT) {
67
68 #ifdef CONFIG_REISERFS_CHECK
69 if (new_num == 0)
70 reiserfs_panic (0,"vs-8005: old_item_num: for INSERT mode and item number of inserted item");
71 #endif
72
73 return new_num - 1;
74 }
75
76 #ifdef CONFIG_REISERFS_CHECK
77 if (mode != M_DELETE)
78 reiserfs_panic (0, "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode);
79 #endif
80
81 /* delete mode */
82 return new_num + 1;
83 }
84
85 static void create_virtual_node (struct tree_balance * tb, int h)
86 {
87 struct item_head * ih;
88 struct virtual_node * vn = tb->tb_vn;
89 int new_num;
90 struct buffer_head * Sh; /* this comes from tb->S[h] */
91
92 Sh = PATH_H_PBUFFER (tb->tb_path, h);
93
94 /* size of changed node */
95 vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h];
96
97 /* for internal nodes array if virtual items is not created */
98 if (h) {
99 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
100 return;
101 }
102
103 /* number of items in virtual node */
104 vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0);
105
106 /* first virtual item */
107 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
108 memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item));
109 vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item);
110
111
112 /* first item in the node */
113 ih = B_N_PITEM_HEAD (Sh, 0);
114
115 /* define the mergeability for 0-th item (if it is not being deleted) */
116 if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
117 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
118
119 /* go through all items those remain in the virtual node (except for the new (inserted) one) */
120 for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) {
121 int j;
122 struct virtual_item * vi = vn->vn_vi + new_num;
123 int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1);
124
125
126 if (is_affected && vn->vn_mode == M_INSERT)
127 continue;
128
129 /* get item number in source node */
130 j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode);
131
132 vi->vi_item_len += ih[j].ih_item_len + IH_SIZE;
133 vi->vi_ih = ih + j;
134 vi->vi_item = B_I_PITEM (Sh, ih + j);
135 vi->vi_uarea = vn->vn_free_ptr;
136
137 // FIXME: there is no check, that item operation did not
138 // consume too much memory
139 vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]);
140 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
141 reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: "
142 "virtual node space consumed");
143
144 if (!is_affected)
145 /* this is not being changed */
146 continue;
147
148 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
149 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
150 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
151 }
152 }
153
154
155 /* virtual inserted item is not defined yet */
156 if (vn->vn_mode == M_INSERT) {
157 struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num;
158
159 #ifdef CONFIG_REISERFS_CHECK
160 if (vn->vn_ins_ih == 0)
161 reiserfs_panic (0, "vs-8040: create_virtual_node: item header of inserted item is not specified");
162 #endif
163
164 vi->vi_item_len = tb->insert_size[0];
165 vi->vi_ih = vn->vn_ins_ih;
166 vi->vi_item = vn->vn_data;
167 vi->vi_uarea = vn->vn_free_ptr;
168
169 op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]);
170 #if 0
171 switch (type/*le_key_k_type (ih_version (vn->vn_ins_ih), &(vn->vn_ins_ih->ih_key))*/) {
172 case TYPE_STAT_DATA:
173 vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_STAT_DATA;
174 break;
175 case TYPE_DIRECT:
176 vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_DIRECT;
177 break;
178 case TYPE_INDIRECT:
179 vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_INDIRECT;
180 break;
181 default:
182 /* inseted item is directory (it must be item with "." and "..") */
183 vn->vn_vi[vn->vn_affected_item_num].vi_type |=
184 (VI_TYPE_DIRECTORY | VI_TYPE_FIRST_DIRECTORY_ITEM | VI_TYPE_INSERTED_DIRECTORY_ITEM);
185
186 /* this directory item can not be split, so do not set sizes of entries */
187 break;
188 }
189 #endif
190 }
191
192 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
193 if (tb->CFR[0]) {
194 struct key * key;
195
196 key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]);
197 if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE ||
198 vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1))
199 vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE;
200
201 #ifdef CONFIG_REISERFS_CHECK
202 if (op_is_left_mergeable (key, Sh->b_size) &&
203 !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) {
204 /* we delete last item and it could be merged with right neighbor's first item */
205 if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) &&
206 I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) {
207 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
208 print_block (Sh, 0, -1, -1);
209 reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
210 key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE);
211 } else
212 /* we can delete directory item, that has only one directory entry in it */
213 ;
214 }
215 #endif
216
217 }
218 }
219
220
221 /* using virtual node check, how many items can be shifted to left
222 neighbor */
223 static void check_left (struct tree_balance * tb, int h, int cur_free)
224 {
225 int i;
226 struct virtual_node * vn = tb->tb_vn;
227 struct virtual_item * vi;
228 int d_size, ih_size;
229
230 #ifdef CONFIG_REISERFS_CHECK
231 if (cur_free < 0)
232 reiserfs_panic (0, "vs-8050: check_left: cur_free (%d) < 0", cur_free);
233 #endif
234
235 /* internal level */
236 if (h > 0) {
237 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
238 return;
239 }
240
241 /* leaf level */
242
243 if (!cur_free || !vn->vn_nr_item) {
244 /* no free space or nothing to move */
245 tb->lnum[h] = 0;
246 tb->lbytes = -1;
247 return;
248 }
249
250 #ifdef CONFIG_REISERFS_CHECK
251 if (!PATH_H_PPARENT (tb->tb_path, 0))
252 reiserfs_panic (0, "vs-8055: check_left: parent does not exist or invalid");
253 #endif
254
255 vi = vn->vn_vi;
256 if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
257 /* all contents of S[0] fits into L[0] */
258
259 #ifdef CONFIG_REISERFS_CHECK
260 if (vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE)
261 reiserfs_panic (0, "vs-8055: check_left: invalid mode or balance condition failed");
262 #endif
263
264 tb->lnum[0] = vn->vn_nr_item;
265 tb->lbytes = -1;
266 return;
267 }
268
269
270 d_size = 0, ih_size = IH_SIZE;
271
272 /* first item may be merge with last item in left neighbor */
273 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
274 d_size = -((int)IH_SIZE), ih_size = 0;
275
276 tb->lnum[0] = 0;
277 for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) {
278 d_size += vi->vi_item_len;
279 if (cur_free >= d_size) {
280 /* the item can be shifted entirely */
281 cur_free -= d_size;
282 tb->lnum[0] ++;
283 continue;
284 }
285
286 /* the item cannot be shifted entirely, try to split it */
287 /* check whether L[0] can hold ih and at least one byte of the item body */
288 if (cur_free <= ih_size) {
289 /* cannot shift even a part of the current item */
290 tb->lbytes = -1;
291 return;
292 }
293 cur_free -= ih_size;
294
295 tb->lbytes = op_check_left (vi, cur_free, 0, 0);
296 if (tb->lbytes != -1)
297 /* count partially shifted item */
298 tb->lnum[0] ++;
299
300 break;
301 }
302
303 return;
304 }
305
306
307 /* using virtual node check, how many items can be shifted to right
308 neighbor */
309 static void check_right (struct tree_balance * tb, int h, int cur_free)
310 {
311 int i;
312 struct virtual_node * vn = tb->tb_vn;
313 struct virtual_item * vi;
314 int d_size, ih_size;
315
316 #ifdef CONFIG_REISERFS_CHECK
317 if (cur_free < 0)
318 reiserfs_panic (tb->tb_sb, "vs-8070: check_right: cur_free < 0");
319 #endif
320
321 /* internal level */
322 if (h > 0) {
323 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
324 return;
325 }
326
327 /* leaf level */
328
329 if (!cur_free || !vn->vn_nr_item) {
330 /* no free space */
331 tb->rnum[h] = 0;
332 tb->rbytes = -1;
333 return;
334 }
335
336 #ifdef CONFIG_REISERFS_CHECK
337 if (!PATH_H_PPARENT (tb->tb_path, 0))
338 reiserfs_panic (tb->tb_sb, "vs-8075: check_right: parent does not exist or invalid");
339 #endif
340
341 vi = vn->vn_vi + vn->vn_nr_item - 1;
342 if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
343 /* all contents of S[0] fits into R[0] */
344
345 #ifdef CONFIG_REISERFS_CHECK
346 if (vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE)
347 reiserfs_panic (tb->tb_sb, "vs-8080: check_right: invalid mode or balance condition failed");
348 #endif
349
350 tb->rnum[h] = vn->vn_nr_item;
351 tb->rbytes = -1;
352 return;
353 }
354
355 d_size = 0, ih_size = IH_SIZE;
356
357 /* last item may be merge with first item in right neighbor */
358 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
359 d_size = -(int)IH_SIZE, ih_size = 0;
360
361 tb->rnum[0] = 0;
362 for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) {
363 d_size += vi->vi_item_len;
364 if (cur_free >= d_size) {
365 /* the item can be shifted entirely */
366 cur_free -= d_size;
367 tb->rnum[0] ++;
368 continue;
369 }
370
371 /* check whether R[0] can hold ih and at least one byte of the item body */
372 if ( cur_free <= ih_size ) { /* cannot shift even a part of the current item */
373 tb->rbytes = -1;
374 return;
375 }
376
377 /* R[0] can hold the header of the item and at least one byte of its body */
378 cur_free -= ih_size; /* cur_free is still > 0 */
379
380 tb->rbytes = op_check_right (vi, cur_free);
381 if (tb->rbytes != -1)
382 /* count partially shifted item */
383 tb->rnum[0] ++;
384
385 break;
386 }
387
388 return;
389 }
390
391
392 /*
393 * from - number of items, which are shifted to left neighbor entirely
394 * to - number of item, which are shifted to right neighbor entirely
395 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
396 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
397 static int get_num_ver (int mode, struct tree_balance * tb, int h,
398 int from, int from_bytes,
399 int to, int to_bytes,
400 short * snum012, int flow
401 )
402 {
403 int i;
404 int cur_free;
405 // int bytes;
406 int units;
407 struct virtual_node * vn = tb->tb_vn;
408 // struct virtual_item * vi;
409
410 int total_node_size, max_node_size, current_item_size;
411 int needed_nodes;
412 int start_item, /* position of item we start filling node from */
413 end_item, /* position of item we finish filling node by */
414 start_bytes,/* number of first bytes (entries for directory) of start_item-th item
415 we do not include into node that is being filled */
416 end_bytes; /* number of last bytes (entries for directory) of end_item-th item
417 we do node include into node that is being filled */
418 int split_item_positions[2]; /* these are positions in virtual item of
419 items, that are split between S[0] and
420 S1new and S1new and S2new */
421
422 split_item_positions[0] = -1;
423 split_item_positions[1] = -1;
424
425 #ifdef CONFIG_REISERFS_CHECK
426 /* We only create additional nodes if we are in insert or paste mode
427 or we are in replace mode at the internal level. If h is 0 and
428 the mode is M_REPLACE then in fix_nodes we change the mode to
429 paste or insert before we get here in the code. */
430 if ( tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE))
431 reiserfs_panic (0, "vs-8100: get_num_ver: insert_size < 0 in overflow");
432 #endif
433
434 max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h));
435
436 /* snum012 [0-2] - number of items, that lay
437 to S[0], first new node and second new node */
438 snum012[3] = -1; /* s1bytes */
439 snum012[4] = -1; /* s2bytes */
440
441 /* internal level */
442 if (h > 0) {
443 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
444 if (i == max_node_size)
445 return 1;
446 return (i / max_node_size + 1);
447 }
448
449 /* leaf level */
450 needed_nodes = 1;
451 total_node_size = 0;
452 cur_free = max_node_size;
453
454 // start from 'from'-th item
455 start_item = from;
456 // skip its first 'start_bytes' units
457 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
458
459 // last included item is the 'end_item'-th one
460 end_item = vn->vn_nr_item - to - 1;
461 // do not count last 'end_bytes' units of 'end_item'-th item
462 end_bytes = (to_bytes != -1) ? to_bytes : 0;
463
464 /* go through all item beginning from the start_item-th item and ending by
465 the end_item-th item. Do not count first 'start_bytes' units of
466 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
467
468 for (i = start_item; i <= end_item; i ++) {
469 struct virtual_item * vi = vn->vn_vi + i;
470 int skip_from_end = ((i == end_item) ? end_bytes : 0);
471
472 #ifdef CONFIG_REISERFS_CHECK
473 if (needed_nodes > 3) {
474 reiserfs_panic (tb->tb_sb, "vs-8105: get_num_ver: too many nodes are needed");
475 }
476 #endif
477
478 /* get size of current item */
479 current_item_size = vi->vi_item_len;
480
481 /* do not take in calculation head part (from_bytes) of from-th item */
482 current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes);
483
484 /* do not take in calculation tail part of last item */
485 current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end);
486
487 /* if item fits into current node entierly */
488 if (total_node_size + current_item_size <= max_node_size) {
489 snum012[needed_nodes - 1] ++;
490 total_node_size += current_item_size;
491 start_bytes = 0;
492 continue;
493 }
494
495 if (current_item_size > max_node_size) {
496 /* virtual item length is longer, than max size of item in
497 a node. It is impossible for direct item */
498 #ifdef CONFIG_REISERFS_CHECK
499 if (is_direct_le_ih (vi->vi_ih))
500 reiserfs_panic (tb->tb_sb, "vs-8110: get_num_ver: "
501 "direct item length is %d. It can not be longer than %d",
502 current_item_size, max_node_size);
503 #endif
504 /* we will try to split it */
505 flow = 1;
506 }
507
508 if (!flow) {
509 /* as we do not split items, take new node and continue */
510 needed_nodes ++; i --; total_node_size = 0;
511 continue;
512 }
513
514 // calculate number of item units which fit into node being
515 // filled
516 {
517 int free_space;
518
519 free_space = max_node_size - total_node_size - IH_SIZE;
520 units = op_check_left (vi, free_space, start_bytes, skip_from_end);
521 if (units == -1) {
522 /* nothing fits into current node, take new node and continue */
523 needed_nodes ++, i--, total_node_size = 0;
524 continue;
525 }
526 }
527
528 /* something fits into the current node */
529 //if (snum012[3] != -1 || needed_nodes != 1)
530 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
531 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
532 start_bytes += units;
533 snum012[needed_nodes - 1 + 3] = units;
534
535 if (needed_nodes > 2)
536 reiserfs_warning ("vs-8111: get_num_ver: split_item_position is out of boundary\n");
537 snum012[needed_nodes - 1] ++;
538 split_item_positions[needed_nodes - 1] = i;
539 needed_nodes ++;
540 /* continue from the same item with start_bytes != -1 */
541 start_item = i;
542 i --;
543 total_node_size = 0;
544 }
545
546 // sum012[4] (if it is not -1) contains number of units of which
547 // are to be in S1new, snum012[3] - to be in S0. They are supposed
548 // to be S1bytes and S2bytes correspondingly, so recalculate
549 if (snum012[4] > 0) {
550 int split_item_num;
551 int bytes_to_r, bytes_to_l;
552 int bytes_to_S1new;
553
554 split_item_num = split_item_positions[1];
555 bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
556 bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
557 bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0);
558
559 // s2bytes
560 snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new;
561
562 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY)
563 reiserfs_warning ("vs-8115: get_num_ver: not directory item\n");
564 }
565
566 /* now we know S2bytes, calculate S1bytes */
567 if (snum012[3] > 0) {
568 int split_item_num;
569 int bytes_to_r, bytes_to_l;
570 int bytes_to_S2new;
571
572 split_item_num = split_item_positions[0];
573 bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
574 bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
575 bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0);
576
577 // s1bytes
578 snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new;
579 }
580
581 return needed_nodes;
582 }
583
584
585 #ifdef CONFIG_REISERFS_CHECK
586 extern struct tree_balance * cur_tb;
587 #endif
588
589
590 /* Set parameters for balancing.
591 * Performs write of results of analysis of balancing into structure tb,
592 * where it will later be used by the functions that actually do the balancing.
593 * Parameters:
594 * tb tree_balance structure;
595 * h current level of the node;
596 * lnum number of items from S[h] that must be shifted to L[h];
597 * rnum number of items from S[h] that must be shifted to R[h];
598 * blk_num number of blocks that S[h] will be splitted into;
599 * s012 number of items that fall into splitted nodes.
600 * lbytes number of bytes which flow to the left neighbor from the item that is not
601 * not shifted entirely
602 * rbytes number of bytes which flow to the right neighbor from the item that is not
603 * not shifted entirely
604 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array)
605 */
606
607 static void set_parameters (struct tree_balance * tb, int h, int lnum,
608 int rnum, int blk_num, short * s012, int lb, int rb)
609 {
610
611 tb->lnum[h] = lnum;
612 tb->rnum[h] = rnum;
613 tb->blknum[h] = blk_num;
614
615 if (h == 0)
616 { /* only for leaf level */
617 if (s012 != NULL)
618 {
619 tb->s0num = * s012 ++,
620 tb->s1num = * s012 ++,
621 tb->s2num = * s012 ++;
622 tb->s1bytes = * s012 ++;
623 tb->s2bytes = * s012;
624 }
625 tb->lbytes = lb;
626 tb->rbytes = rb;
627 }
628 }
629
630
631
632 /* check, does node disappear if we shift tb->lnum[0] items to left
633 neighbor and tb->rnum[0] to the right one. */
634 static int is_leaf_removable (struct tree_balance * tb)
635 {
636 struct virtual_node * vn = tb->tb_vn;
637 int to_left, to_right;
638 int size;
639 int remain_items;
640
641 /* number of items, that will be shifted to left (right) neighbor
642 entirely */
643 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
644 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
645 remain_items = vn->vn_nr_item;
646
647 /* how many items remain in S[0] after shiftings to neighbors */
648 remain_items -= (to_left + to_right);
649
650 if (remain_items < 1) {
651 /* all content of node can be shifted to neighbors */
652 set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1);
653 return 1;
654 }
655
656 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
657 /* S[0] is not removable */
658 return 0;
659
660 /* check, whether we can divide 1 remaining item between neighbors */
661
662 /* get size of remaining item (in item units) */
663 size = op_unit_num (&(vn->vn_vi[to_left]));
664
665 if (tb->lbytes + tb->rbytes >= size) {
666 set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1);
667 return 1;
668 }
669
670 return 0;
671 }
672
673
674 /* check whether L, S, R can be joined in one node */
675 static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree)
676 {
677 struct virtual_node * vn = tb->tb_vn;
678 int ih_size;
679 struct buffer_head *S0;
680
681 S0 = PATH_H_PBUFFER (tb->tb_path, 0);
682
683 ih_size = 0;
684 if (vn->vn_nr_item) {
685 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
686 ih_size += IH_SIZE;
687
688 if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE)
689 ih_size += IH_SIZE;
690 } else {
691 /* there was only one item and it will be deleted */
692 struct item_head * ih;
693
694 #ifdef CONFIG_REISERFS_CHECK
695 if (B_NR_ITEMS (S0) != 1)
696 reiserfs_panic (0, "vs-8125: are_leaves_removable: item number must be 1: it is %d", B_NR_ITEMS(S0));
697 #endif
698
699 ih = B_N_PITEM_HEAD (S0, 0);
700 if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0])))
701 if (is_direntry_le_ih (ih)) {
702 /* Directory must be in correct state here: that is
703 somewhere at the left side should exist first directory
704 item. But the item being deleted can not be that first
705 one because its right neighbor is item of the same
706 directory. (But first item always gets deleted in last
707 turn). So, neighbors of deleted item can be merged, so
708 we can save ih_size */
709 ih_size = IH_SIZE;
710
711 #ifdef CONFIG_REISERFS_CHECK
712 /* we might check that left neighbor exists and is of the
713 same directory */
714 if (le_key_k_offset (ih_version (ih), &(ih->ih_key)) == DOT_OFFSET)
715 reiserfs_panic (tb->tb_sb, "vs-8130: are_leaves_removable: "
716 "first directory item can not be removed until directory is not empty");
717 #endif
718 }
719
720 }
721
722 if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) {
723 set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1);
724 return 1;
725 }
726 return 0;
727
728 }
729
730
731
732 /* when we do not split item, lnum and rnum are numbers of entire items */
733 #define SET_PAR_SHIFT_LEFT \
734 if (h)\
735 {\
736 int to_l;\
737 \
738 to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
739 (MAX_NR_KEY(Sh) + 1 - lpar);\
740 \
741 set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
742 }\
743 else \
744 {\
745 if (lset==LEFT_SHIFT_FLOW)\
746 set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
747 tb->lbytes, -1);\
748 else\
749 set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
750 -1, -1);\
751 }
752
753
754 #define SET_PAR_SHIFT_RIGHT \
755 if (h)\
756 {\
757 int to_r;\
758 \
759 to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
760 \
761 set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
762 }\
763 else \
764 {\
765 if (rset==RIGHT_SHIFT_FLOW)\
766 set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
767 -1, tb->rbytes);\
768 else\
769 set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
770 -1, -1);\
771 }
772
773
774 void free_buffers_in_tb (
775 struct tree_balance * p_s_tb
776 ) {
777 int n_counter;
778
779 decrement_counters_in_path(p_s_tb->tb_path);
780
781 for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) {
782 decrement_bcount(p_s_tb->L[n_counter]);
783 p_s_tb->L[n_counter] = NULL;
784 decrement_bcount(p_s_tb->R[n_counter]);
785 p_s_tb->R[n_counter] = NULL;
786 decrement_bcount(p_s_tb->FL[n_counter]);
787 p_s_tb->FL[n_counter] = NULL;
788 decrement_bcount(p_s_tb->FR[n_counter]);
789 p_s_tb->FR[n_counter] = NULL;
790 decrement_bcount(p_s_tb->CFL[n_counter]);
791 p_s_tb->CFL[n_counter] = NULL;
792 decrement_bcount(p_s_tb->CFR[n_counter]);
793 p_s_tb->CFR[n_counter] = NULL;
794 }
795 }
796
797
798 /* Get new buffers for storing new nodes that are created while balancing.
799 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
800 * CARRY_ON - schedule didn't occur while the function worked;
801 * NO_DISK_SPACE - no disk space.
802 */
803 /* The function is NOT SCHEDULE-SAFE! */
804 static int get_empty_nodes(
805 struct tree_balance * p_s_tb,
806 int n_h
807 ) {
808 struct buffer_head * p_s_new_bh,
809 * p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h);
810 unsigned long * p_n_blocknr,
811 a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, };
812 int n_counter,
813 n_number_of_freeblk,
814 n_amount_needed,/* number of needed empty blocks */
815 n_retval = CARRY_ON;
816 struct super_block * p_s_sb = p_s_tb->tb_sb;
817
818
819 /* number_of_freeblk is the number of empty blocks which have been
820 acquired for use by the balancing algorithm minus the number of
821 empty blocks used in the previous levels of the analysis,
822 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
823 after empty blocks are acquired, and the balancing analysis is
824 then restarted, amount_needed is the number needed by this level
825 (n_h) of the balancing analysis.
826
827 Note that for systems with many processes writing, it would be
828 more layout optimal to calculate the total number needed by all
829 levels and then to run reiserfs_new_blocks to get all of them at once. */
830
831 /* Initiate number_of_freeblk to the amount acquired prior to the restart of
832 the analysis or 0 if not restarted, then subtract the amount needed
833 by all of the levels of the tree below n_h. */
834 /* blknum includes S[n_h], so we subtract 1 in this calculation */
835 for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ )
836 n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0;
837
838 /* Allocate missing empty blocks. */
839 /* if p_s_Sh == 0 then we are getting a new root */
840 n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1;
841 /* Amount_needed = the amount that we need more than the amount that we have. */
842 if ( n_amount_needed > n_number_of_freeblk )
843 n_amount_needed -= n_number_of_freeblk;
844 else /* If we have enough already then there is nothing to do. */
845 return CARRY_ON;
846
847 if ( reiserfs_new_blocknrs (p_s_tb->transaction_handle, a_n_blocknrs,
848 PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_blocknr, n_amount_needed) == NO_DISK_SPACE )
849 return NO_DISK_SPACE;
850
851 /* for each blocknumber we just got, get a buffer and stick it on FEB */
852 for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed;
853 p_n_blocknr++, n_counter++ ) {
854
855 #ifdef CONFIG_REISERFS_CHECK
856 if ( ! *p_n_blocknr )
857 reiserfs_panic(p_s_sb, "PAP-8135: get_empty_nodes: reiserfs_new_blocknrs failed when got new blocks");
858 #endif
859
860 p_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, *p_n_blocknr, p_s_sb->s_blocksize);
861 if (atomic_read (&(p_s_new_bh->b_count)) > 1) {
862 /*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/
863 /*
864 reiserfs_warning ("waiting for buffer %b, iput inode pid = %d, this pid %d, mode %c, %h\n",
865 p_s_new_bh, put_inode_pid, current->pid, p_s_tb->tb_vn->vn_mode, p_s_tb->tb_vn->vn_ins_ih);
866 print_tb (0, 0, 0, p_s_tb, "tb");
867 */
868 /*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/
869 if (atomic_read(&(p_s_new_bh->b_count)) > 2 ||
870 !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) {
871 n_retval = REPEAT_SEARCH ;
872 free_buffers_in_tb (p_s_tb);
873 wait_buffer_until_released (p_s_new_bh);
874 }
875 }
876 #ifdef CONFIG_REISERFS_CHECK
877 if (atomic_read (&(p_s_new_bh->b_count)) != 1 || buffer_dirty (p_s_new_bh)) {
878 if (atomic_read(&(p_s_new_bh->b_count)) > 2 ||
879 !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) {
880 reiserfs_panic(p_s_sb,"PAP-8140: get_empty_nodes: not free or dirty buffer %b for the new block",
881 p_s_new_bh);
882 }
883 }
884 #endif
885
886 /* Put empty buffers into the array. */
887 if (p_s_tb->FEB[p_s_tb->cur_blknum])
888 BUG();
889
890 mark_buffer_journal_new(p_s_new_bh) ;
891 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
892 }
893
894 if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) )
895 n_retval = REPEAT_SEARCH ;
896
897 return n_retval;
898 }
899
900
901 /* Get free space of the left neighbor, which is stored in the parent
902 * node of the left neighbor. */
903 static int get_lfree (struct tree_balance * tb, int h)
904 {
905 struct buffer_head * l, * f;
906 int order;
907
908 if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
909 return 0;
910
911 if (f == l)
912 order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1;
913 else {
914 order = B_NR_ITEMS (l);
915 f = l;
916 }
917
918 return (MAX_CHILD_SIZE(f) - le16_to_cpu (B_N_CHILD(f,order)->dc_size));
919 }
920
921
922 /* Get free space of the right neighbor,
923 * which is stored in the parent node of the right neighbor.
924 */
925 static int get_rfree (struct tree_balance * tb, int h)
926 {
927 struct buffer_head * r, * f;
928 int order;
929
930 if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
931 return 0;
932
933 if (f == r)
934 order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1;
935 else {
936 order = 0;
937 f = r;
938 }
939
940 return (MAX_CHILD_SIZE(f) - B_N_CHILD(f,order)->dc_size);
941
942 }
943
944
945 /* Check whether left neighbor is in memory. */
946 static int is_left_neighbor_in_cache(
947 struct tree_balance * p_s_tb,
948 int n_h
949 ) {
950 struct buffer_head * p_s_father, * left;
951 struct super_block * p_s_sb = p_s_tb->tb_sb;
952 unsigned long n_left_neighbor_blocknr;
953 int n_left_neighbor_position;
954
955 if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */
956 return 0;
957
958 /* Calculate father of the node to be balanced. */
959 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
960
961 #ifdef CONFIG_REISERFS_CHECK
962 if ( ! p_s_father || ! B_IS_IN_TREE (p_s_father) || ! B_IS_IN_TREE (p_s_tb->FL[n_h]) ||
963 ! buffer_uptodate (p_s_father) || ! buffer_uptodate (p_s_tb->FL[n_h]) ) {
964 reiserfs_panic (p_s_sb, "vs-8165: is_left_neighbor_in_cache: F[h] (%b) or FL[h] (%b) is invalid",
965 p_s_father, p_s_tb->FL[n_h]);
966 }
967 #endif
968
969
970 /* Get position of the pointer to the left neighbor into the left father. */
971 n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ?
972 p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]);
973 /* Get left neighbor block number. */
974 n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
975 /* Look for the left neighbor in the cache. */
976 if ( (left = get_hash_table(p_s_sb->s_dev, n_left_neighbor_blocknr, p_s_sb->s_blocksize)) ) {
977
978 #ifdef CONFIG_REISERFS_CHECK
979 if ( buffer_uptodate (left) && ! B_IS_IN_TREE(left) ) {
980 reiserfs_panic(p_s_sb, "vs-8170: is_left_neighbor_in_cache: left neighbor (%b %z) is not in the tree",
981 left, left);
982 }
983 #endif
984 put_bh(left) ;
985 return 1;
986 }
987
988 return 0;
989 }
990
991
992 #define LEFT_PARENTS 'l'
993 #define RIGHT_PARENTS 'r'
994
995
996 static void decrement_key (struct cpu_key * p_s_key)
997 {
998 // call item specific function for this key
999 item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key);
1000
1001
1002 #if 0 /* this works wrong when key is key of second part of tail: it
1003 sets key to be of indirect type. It looks like it makes no
1004 harm but it is unclear */
1005
1006 unsigned long * p_n_key_field = (unsigned long *)p_s_key + REISERFS_FULL_KEY_LEN - 1;
1007 int n_counter;
1008
1009 for( n_counter = 0; n_counter < REISERFS_FULL_KEY_LEN; n_counter++, p_n_key_field-- ) {
1010 if ( *p_n_key_field ) {
1011 (*p_n_key_field)--;
1012 break;
1013 }
1014 }
1015 #ifdef CONFIG_REISERFS_CHECK
1016 if ( n_counter == REISERFS_FULL_KEY_LEN )
1017 reiserfs_panic(NULL, "PAP-8175: decrement_key: zero key");
1018 #endif
1019
1020 #endif /*0*/
1021
1022 }
1023
1024
1025
1026
1027 /* Calculate far left/right parent of the left/right neighbor of the current node, that
1028 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
1029 * Calculate left/right common parent of the current node and L[h]/R[h].
1030 * Calculate left/right delimiting key position.
1031 * Returns: PATH_INCORRECT - path in the tree is not correct;
1032 SCHEDULE_OCCURRED - schedule occurred while the function worked;
1033 * CARRY_ON - schedule didn't occur while the function worked;
1034 */
1035 static int get_far_parent (struct tree_balance * p_s_tb,
1036 int n_h,
1037 struct buffer_head ** pp_s_father,
1038 struct buffer_head ** pp_s_com_father,
1039 char c_lr_par)
1040 {
1041 struct buffer_head * p_s_parent;
1042 INITIALIZE_PATH (s_path_to_neighbor_father);
1043 struct path * p_s_path = p_s_tb->tb_path;
1044 struct cpu_key s_lr_father_key;
1045 int n_counter,
1046 n_position = MAX_INT,
1047 n_first_last_position = 0,
1048 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
1049
1050 /* Starting from F[n_h] go upwards in the tree, and look for the common
1051 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
1052
1053 n_counter = n_path_offset;
1054
1055 #ifdef CONFIG_REISERFS_CHECK
1056 if ( n_counter < FIRST_PATH_ELEMENT_OFFSET )
1057 reiserfs_panic(p_s_tb->tb_sb, "PAP-8180: get_far_parent: invalid path length");
1058 #endif
1059
1060
1061 for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter-- ) {
1062 /* Check whether parent of the current buffer in the path is really parent in the tree. */
1063 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) )
1064 return REPEAT_SEARCH;
1065 /* Check whether position in the parent is correct. */
1066 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) )
1067 return REPEAT_SEARCH;
1068 /* Check whether parent at the path really points to the child. */
1069 if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
1070 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr )
1071 return REPEAT_SEARCH;
1072 /* Return delimiting key if position in the parent is not equal to first/last one. */
1073 if ( c_lr_par == RIGHT_PARENTS )
1074 n_first_last_position = B_NR_ITEMS (p_s_parent);
1075 if ( n_position != n_first_last_position ) {
1076 *pp_s_com_father = p_s_parent;
1077 get_bh(*pp_s_com_father) ;
1078 /*(*pp_s_com_father = p_s_parent)->b_count++;*/
1079 break;
1080 }
1081 }
1082
1083 /* if we are in the root of the tree, then there is no common father */
1084 if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) {
1085 /* Check whether first buffer in the path is the root of the tree. */
1086 if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1087 SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
1088 *pp_s_father = *pp_s_com_father = NULL;
1089 return CARRY_ON;
1090 }
1091 return REPEAT_SEARCH;
1092 }
1093
1094 #ifdef CONFIG_REISERFS_CHECK
1095 if ( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL ) {
1096 reiserfs_panic(p_s_tb->tb_sb, "PAP-8185: get_far_parent: (%b %z) level too small", *pp_s_com_father, *pp_s_com_father);
1097 }
1098 #endif
1099
1100 /* Check whether the common parent is locked. */
1101
1102 if ( buffer_locked (*pp_s_com_father) ) {
1103 __wait_on_buffer(*pp_s_com_father);
1104 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
1105 decrement_bcount(*pp_s_com_father);
1106 return REPEAT_SEARCH;
1107 }
1108 }
1109
1110 /* So, we got common parent of the current node and its left/right neighbor.
1111 Now we are geting the parent of the left/right neighbor. */
1112
1113 /* Form key to get parent of the left/right neighbor. */
1114 le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ?
1115 (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position)));
1116
1117
1118 if ( c_lr_par == LEFT_PARENTS )
1119 decrement_key(&s_lr_father_key);
1120
1121 if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR)
1122 // path is released
1123 return IO_ERROR;
1124
1125 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
1126 decrement_counters_in_path(&s_path_to_neighbor_father);
1127 decrement_bcount(*pp_s_com_father);
1128 return REPEAT_SEARCH;
1129 }
1130
1131 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1132
1133 #ifdef CONFIG_REISERFS_CHECK
1134 if ( B_LEVEL (*pp_s_father) != n_h + 1 ) {
1135 reiserfs_panic(p_s_tb->tb_sb, "PAP-8190: get_far_parent: (%b %z) level too small", *pp_s_father, *pp_s_father);
1136 }
1137
1138 if ( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET )
1139 reiserfs_panic(0, "PAP-8192: get_far_parent: path length is too small");
1140
1141 #endif
1142
1143 s_path_to_neighbor_father.path_length--;
1144 decrement_counters_in_path(&s_path_to_neighbor_father);
1145 return CARRY_ON;
1146 }
1147
1148
1149 /* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1150 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1151 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
1152 * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
1153 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1154 * CARRY_ON - schedule didn't occur while the function worked;
1155 */
1156 static int get_parents (struct tree_balance * p_s_tb, int n_h)
1157 {
1158 struct path * p_s_path = p_s_tb->tb_path;
1159 int n_position,
1160 n_ret_value,
1161 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1162 struct buffer_head * p_s_curf,
1163 * p_s_curcf;
1164
1165 /* Current node is the root of the tree or will be root of the tree */
1166 if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
1167 /* The root can not have parents.
1168 Release nodes which previously were obtained as parents of the current node neighbors. */
1169 decrement_bcount(p_s_tb->FL[n_h]);
1170 decrement_bcount(p_s_tb->CFL[n_h]);
1171 decrement_bcount(p_s_tb->FR[n_h]);
1172 decrement_bcount(p_s_tb->CFR[n_h]);
1173 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL;
1174 return CARRY_ON;
1175 }
1176
1177 /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1178 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) ) {
1179 /* Current node is not the first child of its parent. */
1180 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
1181 p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1182 get_bh(p_s_curf) ;
1183 get_bh(p_s_curf) ;
1184 p_s_tb->lkey[n_h] = n_position - 1;
1185 }
1186 else {
1187 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1188 Calculate current common parent of L[n_path_offset] and the current node. Note that
1189 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1190 Calculate lkey[n_path_offset]. */
1191 if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1192 &p_s_curcf, LEFT_PARENTS)) != CARRY_ON )
1193 return n_ret_value;
1194 }
1195
1196 decrement_bcount(p_s_tb->FL[n_h]);
1197 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
1198 decrement_bcount(p_s_tb->CFL[n_h]);
1199 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
1200
1201 #ifdef CONFIG_REISERFS_CHECK
1202 if ((p_s_curf && !B_IS_IN_TREE (p_s_curf)) || (p_s_curcf && !B_IS_IN_TREE (p_s_curcf))) {
1203 reiserfs_panic (p_s_tb->tb_sb, "PAP-8195: get_parents: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1204 }
1205 #endif
1206
1207 /* Get parent FR[n_h] of R[n_h]. */
1208
1209 /* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1210 if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) {
1211 /* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1212 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1213 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1214 if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON )
1215 return n_ret_value;
1216 }
1217 else {
1218 /* Current node is not the last child of its parent F[n_h]. */
1219 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
1220 p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1221 get_bh(p_s_curf) ;
1222 get_bh(p_s_curf) ;
1223 p_s_tb->rkey[n_h] = n_position;
1224 }
1225
1226 decrement_bcount(p_s_tb->FR[n_h]);
1227 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
1228
1229 decrement_bcount(p_s_tb->CFR[n_h]);
1230 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
1231
1232 #ifdef CONFIG_REISERFS_CHECK
1233 #if 0
1234 if (n_h == 0 && p_s_tb->CFR[n_h] && COMP_KEYS (B_PRIGHT_DELIM_KEY (PATH_H_PBUFFER(p_s_path, n_h)),
1235 B_N_PDELIM_KEY (p_s_tb->CFR[n_h], p_s_tb->rkey[n_h]))) {
1236 reiserfs_panic (p_s_tb->tb_sb, "PAP-8200: get_parents: rdkey in S0 %k, rdkey in CFR0 %k do not match",
1237 B_PRIGHT_DELIM_KEY (PATH_H_PBUFFER(p_s_path, n_h)), B_N_PDELIM_KEY (p_s_tb->CFR[n_h], p_s_tb->rkey[n_h]));
1238 }
1239 #endif
1240 if ((p_s_curf && !B_IS_IN_TREE (p_s_curf)) || (p_s_curcf && !B_IS_IN_TREE (p_s_curcf))) {
1241 reiserfs_panic (p_s_tb->tb_sb, "PAP-8205: get_parents: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1242 }
1243 #endif
1244
1245 return CARRY_ON;
1246 }
1247
1248
1249 /* it is possible to remove node as result of shiftings to
1250 neighbors even when we insert or paste item. */
1251 static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h)
1252 {
1253 struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h);
1254 int levbytes = tb->insert_size[h];
1255 struct item_head * ih;
1256 struct key * r_key = NULL;
1257
1258 ih = B_N_PITEM_HEAD (Sh, 0);
1259 if ( tb->CFR[h] )
1260 r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]);
1261
1262 if (
1263 lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1264 /* shifting may merge items which might save space */
1265 - (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0)
1266 - (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0)
1267 + (( h ) ? KEY_SIZE : 0))
1268 {
1269 /* node can not be removed */
1270 if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */
1271 if ( ! h )
1272 tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0);
1273 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1274 return NO_BALANCING_NEEDED;
1275 }
1276 }
1277 return !NO_BALANCING_NEEDED;
1278 }
1279
1280
1281
1282 /* Check whether current node S[h] is balanced when increasing its size by
1283 * Inserting or Pasting.
1284 * Calculate parameters for balancing for current level h.
1285 * Parameters:
1286 * tb tree_balance structure;
1287 * h current level of the node;
1288 * inum item number in S[h];
1289 * mode i - insert, p - paste;
1290 * Returns: 1 - schedule occurred;
1291 * 0 - balancing for higher levels needed;
1292 * -1 - no balancing for higher levels needed;
1293 * -2 - no disk space.
1294 */
1295 /* ip means Inserting or Pasting */
1296 static int ip_check_balance (struct tree_balance * tb, int h)
1297 {
1298 struct virtual_node * vn = tb->tb_vn;
1299 int levbytes, /* Number of bytes that must be inserted into (value
1300 is negative if bytes are deleted) buffer which
1301 contains node being balanced. The mnemonic is
1302 that the attempted change in node space used level
1303 is levbytes bytes. */
1304 n_ret_value;
1305
1306 int lfree, sfree, rfree /* free space in L, S and R */;
1307
1308 /* nver is short for number of vertixes, and lnver is the number if
1309 we shift to the left, rnver is the number if we shift to the
1310 right, and lrnver is the number if we shift in both directions.
1311 The goal is to minimize first the number of vertixes, and second,
1312 the number of vertixes whose contents are changed by shifting,
1313 and third the number of uncached vertixes whose contents are
1314 changed by shifting and must be read from disk. */
1315 int nver, lnver, rnver, lrnver;
1316
1317 /* used at leaf level only, S0 = S[0] is the node being balanced,
1318 sInum [ I = 0,1,2 ] is the number of items that will
1319 remain in node SI after balancing. S1 and S2 are new
1320 nodes that might be created. */
1321
1322 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters.
1323 where 4th parameter is s1bytes and 5th - s2bytes
1324 */
1325 short snum012[40] = {0,}; /* s0num, s1num, s2num for 8 cases
1326 0,1 - do not shift and do not shift but bottle
1327 2 - shift only whole item to left
1328 3 - shift to left and bottle as much as possible
1329 4,5 - shift to right (whole items and as much as possible
1330 6,7 - shift to both directions (whole items and as much as possible)
1331 */
1332
1333 /* Sh is the node whose balance is currently being checked */
1334 struct buffer_head * Sh;
1335
1336 Sh = PATH_H_PBUFFER (tb->tb_path, h);
1337 levbytes = tb->insert_size[h];
1338
1339 /* Calculate balance parameters for creating new root. */
1340 if ( ! Sh ) {
1341 if ( ! h )
1342 reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0");
1343 switch ( n_ret_value = get_empty_nodes (tb, h) ) {
1344 case CARRY_ON:
1345 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1346 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1347
1348 case NO_DISK_SPACE:
1349 case REPEAT_SEARCH:
1350 return n_ret_value;
1351 default:
1352 reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1353 }
1354 }
1355
1356 if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */
1357 return n_ret_value;
1358
1359 sfree = B_FREE_SPACE (Sh);
1360
1361 /* get free space of neighbors */
1362 rfree = get_rfree (tb, h);
1363 lfree = get_lfree (tb, h);
1364
1365 if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED)
1366 /* and new item fits into node S[h] without any shifting */
1367 return NO_BALANCING_NEEDED;
1368
1369 create_virtual_node (tb, h);
1370
1371 /*
1372 determine maximal number of items we can shift to the left neighbor (in tb structure)
1373 and the maximal number of bytes that can flow to the left neighbor
1374 from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1375 */
1376 check_left (tb, h, lfree);
1377
1378 /*
1379 determine maximal number of items we can shift to the right neighbor (in tb structure)
1380 and the maximal number of bytes that can flow to the right neighbor
1381 from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1382 */
1383 check_right (tb, h, rfree);
1384
1385
1386 /* all contents of internal node S[h] can be moved into its
1387 neighbors, S[h] will be removed after balancing */
1388 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1389 int to_r;
1390
1391 /* Since we are working on internal nodes, and our internal
1392 nodes have fixed size entries, then we can balance by the
1393 number of items rather than the space they consume. In this
1394 routine we set the left node equal to the right node,
1395 allowing a difference of less than or equal to 1 child
1396 pointer. */
1397 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1398 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1399 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1400 return CARRY_ON;
1401 }
1402
1403 #ifdef CONFIG_REISERFS_CHECK
1404 /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1405 if ( h && ( tb->lnum[h] >= vn->vn_nr_item + 1 || tb->rnum[h] >= vn->vn_nr_item + 1) )
1406 reiserfs_panic (tb->tb_sb, "vs-8220: ip_check_balance: tree is not balanced on internal level");
1407
1408 if ( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1409 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) ))
1410 reiserfs_panic(tb->tb_sb, "vs-8225: ip_check_balance: tree is not balanced on leaf level");
1411 #endif
1412
1413 /* all contents of S[0] can be moved into its neighbors
1414 S[0] will be removed after balancing. */
1415 if (!h && is_leaf_removable (tb))
1416 return CARRY_ON;
1417
1418
1419 /* why do we perform this check here rather than earlier??
1420 Answer: we can win 1 node in some cases above. Moreover we
1421 checked it above, when we checked, that S[0] is not removable
1422 in principle */
1423 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1424 if ( ! h )
1425 tb->s0num = vn->vn_nr_item;
1426 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1427 return NO_BALANCING_NEEDED;
1428 }
1429
1430
1431 {
1432 int lpar, rpar, nset, lset, rset, lrset;
1433 /*
1434 * regular overflowing of the node
1435 */
1436
1437 /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1438 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1439 nset, lset, rset, lrset - shows, whether flowing items give better packing
1440 */
1441 #define FLOW 1
1442 #define NO_FLOW 0 /* do not any splitting */
1443
1444 /* we choose one the following */
1445 #define NOTHING_SHIFT_NO_FLOW 0
1446 #define NOTHING_SHIFT_FLOW 5
1447 #define LEFT_SHIFT_NO_FLOW 10
1448 #define LEFT_SHIFT_FLOW 15
1449 #define RIGHT_SHIFT_NO_FLOW 20
1450 #define RIGHT_SHIFT_FLOW 25
1451 #define LR_SHIFT_NO_FLOW 30
1452 #define LR_SHIFT_FLOW 35
1453
1454
1455 lpar = tb->lnum[h];
1456 rpar = tb->rnum[h];
1457
1458
1459 /* calculate number of blocks S[h] must be split into when
1460 nothing is shifted to the neighbors,
1461 as well as number of items in each part of the split node (s012 numbers),
1462 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1463 nset = NOTHING_SHIFT_NO_FLOW;
1464 nver = get_num_ver (vn->vn_mode, tb, h,
1465 0, -1, h?vn->vn_nr_item:0, -1,
1466 snum012, NO_FLOW);
1467
1468 if (!h)
1469 {
1470 int nver1;
1471
1472 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1473 nver1 = get_num_ver (vn->vn_mode, tb, h,
1474 0, -1, 0, -1,
1475 snum012 + NOTHING_SHIFT_FLOW, FLOW);
1476 if (nver > nver1)
1477 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1478 }
1479
1480
1481 /* calculate number of blocks S[h] must be split into when
1482 l_shift_num first items and l_shift_bytes of the right most
1483 liquid item to be shifted are shifted to the left neighbor,
1484 as well as number of items in each part of the splitted node (s012 numbers),
1485 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1486 */
1487 lset = LEFT_SHIFT_NO_FLOW;
1488 lnver = get_num_ver (vn->vn_mode, tb, h,
1489 lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1,
1490 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1491 if (!h)
1492 {
1493 int lnver1;
1494
1495 lnver1 = get_num_ver (vn->vn_mode, tb, h,
1496 lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1,
1497 snum012 + LEFT_SHIFT_FLOW, FLOW);
1498 if (lnver > lnver1)
1499 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1500 }
1501
1502
1503 /* calculate number of blocks S[h] must be split into when
1504 r_shift_num first items and r_shift_bytes of the left most
1505 liquid item to be shifted are shifted to the right neighbor,
1506 as well as number of items in each part of the splitted node (s012 numbers),
1507 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1508 */
1509 rset = RIGHT_SHIFT_NO_FLOW;
1510 rnver = get_num_ver (vn->vn_mode, tb, h,
1511 0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1,
1512 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1513 if (!h)
1514 {
1515 int rnver1;
1516
1517 rnver1 = get_num_ver (vn->vn_mode, tb, h,
1518 0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1519 snum012 + RIGHT_SHIFT_FLOW, FLOW);
1520
1521 if (rnver > rnver1)
1522 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1523 }
1524
1525
1526 /* calculate number of blocks S[h] must be split into when
1527 items are shifted in both directions,
1528 as well as number of items in each part of the splitted node (s012 numbers),
1529 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1530 */
1531 lrset = LR_SHIFT_NO_FLOW;
1532 lrnver = get_num_ver (vn->vn_mode, tb, h,
1533 lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1,
1534 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1535 if (!h)
1536 {
1537 int lrnver1;
1538
1539 lrnver1 = get_num_ver (vn->vn_mode, tb, h,
1540 lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1541 snum012 + LR_SHIFT_FLOW, FLOW);
1542 if (lrnver > lrnver1)
1543 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1544 }
1545
1546
1547
1548 /* Our general shifting strategy is:
1549 1) to minimized number of new nodes;
1550 2) to minimized number of neighbors involved in shifting;
1551 3) to minimized number of disk reads; */
1552
1553 /* we can win TWO or ONE nodes by shifting in both directions */
1554 if (lrnver < lnver && lrnver < rnver)
1555 {
1556 #ifdef CONFIG_REISERFS_CHECK
1557 if (h && (tb->lnum[h] != 1 || tb->rnum[h] != 1 || lrnver != 1 || rnver != 2 || lnver != 2 || h != 1))
1558 reiserfs_panic (0, "vs-8230: check_balance: bad h");
1559 #endif
1560 if (lrset == LR_SHIFT_FLOW)
1561 set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset,
1562 tb->lbytes, tb->rbytes);
1563 else
1564 set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1),
1565 tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1);
1566
1567 return CARRY_ON;
1568 }
1569
1570 /* if shifting doesn't lead to better packing then don't shift */
1571 if (nver == lrnver)
1572 {
1573 set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1);
1574 return CARRY_ON;
1575 }
1576
1577
1578 /* now we know that for better packing shifting in only one
1579 direction either to the left or to the right is required */
1580
1581 /* if shifting to the left is better than shifting to the right */
1582 if (lnver < rnver)
1583 {
1584 SET_PAR_SHIFT_LEFT;
1585 return CARRY_ON;
1586 }
1587
1588 /* if shifting to the right is better than shifting to the left */
1589 if (lnver > rnver)
1590 {
1591 SET_PAR_SHIFT_RIGHT;
1592 return CARRY_ON;
1593 }
1594
1595
1596 /* now shifting in either direction gives the same number
1597 of nodes and we can make use of the cached neighbors */
1598 if (is_left_neighbor_in_cache (tb,h))
1599 {
1600 SET_PAR_SHIFT_LEFT;
1601 return CARRY_ON;
1602 }
1603
1604 /* shift to the right independently on whether the right neighbor in cache or not */
1605 SET_PAR_SHIFT_RIGHT;
1606 return CARRY_ON;
1607 }
1608 }
1609
1610
1611 /* Check whether current node S[h] is balanced when Decreasing its size by
1612 * Deleting or Cutting for INTERNAL node of S+tree.
1613 * Calculate parameters for balancing for current level h.
1614 * Parameters:
1615 * tb tree_balance structure;
1616 * h current level of the node;
1617 * inum item number in S[h];
1618 * mode i - insert, p - paste;
1619 * Returns: 1 - schedule occurred;
1620 * 0 - balancing for higher levels needed;
1621 * -1 - no balancing for higher levels needed;
1622 * -2 - no disk space.
1623 *
1624 * Note: Items of internal nodes have fixed size, so the balance condition for
1625 * the internal part of S+tree is as for the B-trees.
1626 */
1627 static int dc_check_balance_internal (struct tree_balance * tb, int h)
1628 {
1629 struct virtual_node * vn = tb->tb_vn;
1630
1631 /* Sh is the node whose balance is currently being checked,
1632 and Fh is its father. */
1633 struct buffer_head * Sh, * Fh;
1634 int maxsize,
1635 n_ret_value;
1636 int lfree, rfree /* free space in L and R */;
1637
1638 Sh = PATH_H_PBUFFER (tb->tb_path, h);
1639 Fh = PATH_H_PPARENT (tb->tb_path, h);
1640
1641 maxsize = MAX_CHILD_SIZE(Sh);
1642
1643 /* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1644 /* new_nr_item = number of items node would have if operation is */
1645 /* performed without balancing (new_nr_item); */
1646 create_virtual_node (tb, h);
1647
1648 if ( ! Fh )
1649 { /* S[h] is the root. */
1650 if ( vn->vn_nr_item > 0 )
1651 {
1652 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1653 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1654 }
1655 /* new_nr_item == 0.
1656 * Current root will be deleted resulting in
1657 * decrementing the tree height. */
1658 set_parameters (tb, h, 0, 0, 0, NULL, -1, -1);
1659 return CARRY_ON;
1660 }
1661
1662 if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
1663 return n_ret_value;
1664
1665
1666 /* get free space of neighbors */
1667 rfree = get_rfree (tb, h);
1668 lfree = get_lfree (tb, h);
1669
1670 /* determine maximal number of items we can fit into neighbors */
1671 check_left (tb, h, lfree);
1672 check_right (tb, h, rfree);
1673
1674
1675 if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) )
1676 { /* Balance condition for the internal node is valid.
1677 * In this case we balance only if it leads to better packing. */
1678 if ( vn->vn_nr_item == MIN_NR_KEY(Sh) )
1679 { /* Here we join S[h] with one of its neighbors,
1680 * which is impossible with greater values of new_nr_item. */
1681 if ( tb->lnum[h] >= vn->vn_nr_item + 1 )
1682 {
1683 /* All contents of S[h] can be moved to L[h]. */
1684 int n;
1685 int order_L;
1686
1687 order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1688 n = B_N_CHILD(tb->FL[h],order_L)->dc_size / (DC_SIZE + KEY_SIZE);
1689 set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1690 return CARRY_ON;
1691 }
1692
1693 if ( tb->rnum[h] >= vn->vn_nr_item + 1 )
1694 {
1695 /* All contents of S[h] can be moved to R[h]. */
1696 int n;
1697 int order_R;
1698
1699 order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1;
1700 n = B_N_CHILD(tb->FR[h],order_R)->dc_size / (DC_SIZE + KEY_SIZE);
1701 set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1702 return CARRY_ON;
1703 }
1704 }
1705
1706 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
1707 {
1708 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1709 int to_r;
1710
1711 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1712 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1713 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1714 return CARRY_ON;
1715 }
1716
1717 /* Balancing does not lead to better packing. */
1718 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1719 return NO_BALANCING_NEEDED;
1720 }
1721
1722 /* Current node contain insufficient number of items. Balancing is required. */
1723 /* Check whether we can merge S[h] with left neighbor. */
1724 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1725 if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h])
1726 {
1727 int n;
1728 int order_L;
1729
1730 order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1731 n = B_N_CHILD(tb->FL[h],order_L)->dc_size / (DC_SIZE + KEY_SIZE);
1732 set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1733 return CARRY_ON;
1734 }
1735
1736 /* Check whether we can merge S[h] with right neighbor. */
1737 if (tb->rnum[h] >= vn->vn_nr_item + 1)
1738 {
1739 int n;
1740 int order_R;
1741
1742 order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1743 n = B_N_CHILD(tb->FR[h],order_R)->dc_size / (DC_SIZE + KEY_SIZE);
1744 set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1745 return CARRY_ON;
1746 }
1747
1748 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1749 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
1750 {
1751 int to_r;
1752
1753 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1754 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1755 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1756 return CARRY_ON;
1757 }
1758
1759 /* For internal nodes try to borrow item from a neighbor */
1760 #ifdef CONFIG_REISERFS_CHECK
1761 if (!tb->FL[h] && !tb->FR[h])
1762 reiserfs_panic (0, "vs-8235: dc_check_balance_internal: trying to borrow for root");
1763 #endif
1764
1765 /* Borrow one or two items from caching neighbor */
1766 if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h])
1767 {
1768 int from_l;
1769
1770 from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 - (vn->vn_nr_item + 1);
1771 set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1);
1772 return CARRY_ON;
1773 }
1774
1775 set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1,
1776 NULL, -1, -1);
1777 return CARRY_ON;
1778 }
1779
1780
1781 /* Check whether current node S[h] is balanced when Decreasing its size by
1782 * Deleting or Truncating for LEAF node of S+tree.
1783 * Calculate parameters for balancing for current level h.
1784 * Parameters:
1785 * tb tree_balance structure;
1786 * h current level of the node;
1787 * inum item number in S[h];
1788 * mode i - insert, p - paste;
1789 * Returns: 1 - schedule occurred;
1790 * 0 - balancing for higher levels needed;
1791 * -1 - no balancing for higher levels needed;
1792 * -2 - no disk space.
1793 */
1794 static int dc_check_balance_leaf (struct tree_balance * tb, int h)
1795 {
1796 struct virtual_node * vn = tb->tb_vn;
1797
1798 /* Number of bytes that must be deleted from
1799 (value is negative if bytes are deleted) buffer which
1800 contains node being balanced. The mnemonic is that the
1801 attempted change in node space used level is levbytes bytes. */
1802 int levbytes;
1803 /* the maximal item size */
1804 int maxsize,
1805 n_ret_value;
1806 /* S0 is the node whose balance is currently being checked,
1807 and F0 is its father. */
1808 struct buffer_head * S0, * F0;
1809 int lfree, rfree /* free space in L and R */;
1810
1811 S0 = PATH_H_PBUFFER (tb->tb_path, 0);
1812 F0 = PATH_H_PPARENT (tb->tb_path, 0);
1813
1814 levbytes = tb->insert_size[h];
1815
1816 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1817
1818 if ( ! F0 )
1819 { /* S[0] is the root now. */
1820
1821 #ifdef CONFIG_REISERFS_CHECK
1822 if ( -levbytes >= maxsize - B_FREE_SPACE (S0) )
1823 reiserfs_panic (tb->tb_sb, "vs-8240: dc_check_balance_leaf: attempt to create empty buffer tree");
1824 #endif
1825
1826 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1827 return NO_BALANCING_NEEDED;
1828 }
1829
1830 if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
1831 return n_ret_value;
1832
1833 /* get free space of neighbors */
1834 rfree = get_rfree (tb, h);
1835 lfree = get_lfree (tb, h);
1836
1837 create_virtual_node (tb, h);
1838
1839 /* if 3 leaves can be merge to one, set parameters and return */
1840 if (are_leaves_removable (tb, lfree, rfree))
1841 return CARRY_ON;
1842
1843 /* determine maximal number of items we can shift to the left/right neighbor
1844 and the maximal number of bytes that can flow to the left/right neighbor
1845 from the left/right most liquid item that cannot be shifted from S[0] entirely
1846 */
1847 check_left (tb, h, lfree);
1848 check_right (tb, h, rfree);
1849
1850 /* check whether we can merge S with left neighbor. */
1851 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1852 if (is_left_neighbor_in_cache (tb,h) ||
1853 ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1854 !tb->FR[h]) {
1855
1856 #ifdef CONFIG_REISERFS_CHECK
1857 if (!tb->FL[h])
1858 reiserfs_panic (0, "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1859 #endif
1860
1861 /* set parameter to merge S[0] with its left neighbor */
1862 set_parameters (tb, h, -1, 0, 0, NULL, -1, -1);
1863 return CARRY_ON;
1864 }
1865
1866 /* check whether we can merge S[0] with right neighbor. */
1867 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1868 set_parameters (tb, h, 0, -1, 0, NULL, -1, -1);
1869 return CARRY_ON;
1870 }
1871
1872 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1873 if (is_leaf_removable (tb))
1874 return CARRY_ON;
1875
1876 /* Balancing is not required. */
1877 tb->s0num = vn->vn_nr_item;
1878 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1879 return NO_BALANCING_NEEDED;
1880 }
1881
1882
1883
1884 /* Check whether current node S[h] is balanced when Decreasing its size by
1885 * Deleting or Cutting.
1886 * Calculate parameters for balancing for current level h.
1887 * Parameters:
1888 * tb tree_balance structure;
1889 * h current level of the node;
1890 * inum item number in S[h];
1891 * mode d - delete, c - cut.
1892 * Returns: 1 - schedule occurred;
1893 * 0 - balancing for higher levels needed;
1894 * -1 - no balancing for higher levels needed;
1895 * -2 - no disk space.
1896 */
1897 static int dc_check_balance (struct tree_balance * tb, int h)
1898 {
1899
1900 #ifdef CONFIG_REISERFS_CHECK
1901 if ( ! (PATH_H_PBUFFER (tb->tb_path, h)) )
1902 reiserfs_panic(tb->tb_sb, "vs-8250: dc_check_balance: S is not initialized");
1903 #endif
1904
1905 if ( h )
1906 return dc_check_balance_internal (tb, h);
1907 else
1908 return dc_check_balance_leaf (tb, h);
1909 }
1910
1911
1912
1913 /* Check whether current node S[h] is balanced.
1914 * Calculate parameters for balancing for current level h.
1915 * Parameters:
1916 *
1917 * tb tree_balance structure:
1918 *
1919 * tb is a large structure that must be read about in the header file
1920 * at the same time as this procedure if the reader is to successfully
1921 * understand this procedure
1922 *
1923 * h current level of the node;
1924 * inum item number in S[h];
1925 * mode i - insert, p - paste, d - delete, c - cut.
1926 * Returns: 1 - schedule occurred;
1927 * 0 - balancing for higher levels needed;
1928 * -1 - no balancing for higher levels needed;
1929 * -2 - no disk space.
1930 */
1931 static int check_balance (int mode,
1932 struct tree_balance * tb,
1933 int h,
1934 int inum,
1935 int pos_in_item,
1936 struct item_head * ins_ih,
1937 const void * data
1938 )
1939 {
1940 struct virtual_node * vn;
1941
1942 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1943 vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1944 vn->vn_mode = mode;
1945 vn->vn_affected_item_num = inum;
1946 vn->vn_pos_in_item = pos_in_item;
1947 vn->vn_ins_ih = ins_ih;
1948 vn->vn_data = data;
1949
1950 #ifdef CONFIG_REISERFS_CHECK
1951 if (mode == M_INSERT && !vn->vn_ins_ih)
1952 reiserfs_panic (0, "vs-8255: check_balance: ins_ih can not be 0 in insert mode");
1953 #endif
1954
1955 if ( tb->insert_size[h] > 0 )
1956 /* Calculate balance parameters when size of node is increasing. */
1957 return ip_check_balance (tb, h);
1958
1959 /* Calculate balance parameters when size of node is decreasing. */
1960 return dc_check_balance (tb, h);
1961 }
1962
1963
1964
1965 /* Check whether parent at the path is the really parent of the current node.*/
1966 static int get_direct_parent(
1967 struct tree_balance * p_s_tb,
1968 int n_h
1969 ) {
1970 struct buffer_head * p_s_bh;
1971 struct path * p_s_path = p_s_tb->tb_path;
1972 int n_position,
1973 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1974
1975 /* We are in the root or in the new root. */
1976 if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
1977
1978 #ifdef CONFIG_REISERFS_CHECK
1979 if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1 )
1980 reiserfs_panic(p_s_tb->tb_sb, "PAP-8260: get_direct_parent: illegal offset in the path");
1981 #endif
1982
1983 if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1984 SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
1985 /* Root is not changed. */
1986 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1987 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1988 return CARRY_ON;
1989 }
1990 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1991 }
1992
1993 if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) )
1994 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1995
1996 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) )
1997 return REPEAT_SEARCH;
1998
1999 if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr )
2000 /* Parent in the path is not parent of the current node in the tree. */
2001 return REPEAT_SEARCH;
2002
2003 if ( buffer_locked(p_s_bh) ) {
2004 __wait_on_buffer(p_s_bh);
2005 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2006 return REPEAT_SEARCH;
2007 }
2008
2009 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
2010 }
2011
2012
2013 /* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
2014 * of S[n_h] we
2015 * need in order to balance S[n_h], and get them if necessary.
2016 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
2017 * CARRY_ON - schedule didn't occur while the function worked;
2018 */
2019 static int get_neighbors(
2020 struct tree_balance * p_s_tb,
2021 int n_h
2022 ) {
2023 int n_child_position,
2024 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
2025 unsigned long n_son_number;
2026 struct super_block * p_s_sb = p_s_tb->tb_sb;
2027 struct buffer_head * p_s_bh;
2028
2029
2030 if ( p_s_tb->lnum[n_h] ) {
2031 /* We need left neighbor to balance S[n_h]. */
2032 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
2033
2034 #ifdef CONFIG_REISERFS_CHECK
2035 if ( p_s_bh == p_s_tb->FL[n_h] && ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) )
2036 reiserfs_panic (p_s_tb->tb_sb, "PAP-8270: get_neighbors: invalid position in the parent");
2037 #endif
2038
2039 n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]);
2040 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
2041 p_s_bh = reiserfs_bread(p_s_sb->s_dev, n_son_number, p_s_sb->s_blocksize);
2042 if (!p_s_bh)
2043 return IO_ERROR;
2044 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
2045 decrement_bcount(p_s_bh);
2046 return REPEAT_SEARCH;
2047 }
2048
2049 #ifdef CONFIG_REISERFS_CHECK
2050 if ( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) || n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
2051 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != p_s_bh->b_blocknr )
2052 reiserfs_panic (p_s_tb->tb_sb, "PAP-8275: get_neighbors: invalid parent");
2053 if ( ! B_IS_IN_TREE(p_s_bh) )
2054 reiserfs_panic (p_s_tb->tb_sb, "PAP-8280: get_neighbors: invalid child");
2055
2056 if (! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - B_N_CHILD (p_s_tb->FL[0],n_child_position)->dc_size)
2057 reiserfs_panic (p_s_tb->tb_sb, "PAP-8290: get_neighbors: invalid child size of left neighbor");
2058 #endif
2059
2060 decrement_bcount(p_s_tb->L[n_h]);
2061 p_s_tb->L[n_h] = p_s_bh;
2062 }
2063
2064
2065 if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */
2066 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
2067
2068 #ifdef CONFIG_REISERFS_CHECK
2069 if ( p_s_bh == p_s_tb->FR[n_h] && PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh) )
2070 reiserfs_panic (p_s_tb->tb_sb, "PAP-8295: get_neighbors: invalid position in the parent");
2071 #endif
2072
2073 n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0;
2074 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
2075 p_s_bh = reiserfs_bread(p_s_sb->s_dev, n_son_number, p_s_sb->s_blocksize);
2076 if (!p_s_bh)
2077 return IO_ERROR;
2078 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
2079 decrement_bcount(p_s_bh);
2080 return REPEAT_SEARCH;
2081 }
2082 decrement_bcount(p_s_tb->R[n_h]);
2083 p_s_tb->R[n_h] = p_s_bh;
2084
2085 #ifdef CONFIG_REISERFS_CHECK
2086 if (! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - B_N_CHILD (p_s_tb->FR[0],n_child_position)->dc_size) {
2087 reiserfs_panic (p_s_tb->tb_sb, "PAP-8300: get_neighbors: invalid child size of right neighbor (%d != %d - %d)",
2088 B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh), B_N_CHILD (p_s_tb->FR[0],n_child_position)->dc_size);
2089 }
2090 #endif
2091
2092 }
2093 return CARRY_ON;
2094 }
2095
2096
2097 void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s)
2098 {
2099 void * vp;
2100 static size_t malloced;
2101
2102
2103 vp = kmalloc (size, flags);
2104 if (vp) {
2105 s->u.reiserfs_sb.s_kmallocs += size;
2106 if (s->u.reiserfs_sb.s_kmallocs > malloced + 200000) {
2107 reiserfs_warning ("vs-8301: reiserfs_kmalloc: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs);
2108 malloced = s->u.reiserfs_sb.s_kmallocs;
2109 }
2110 }
2111 /*printk ("malloc : size %d, allocated %d\n", size, s->u.reiserfs_sb.s_kmallocs);*/
2112 return vp;
2113 }
2114
2115 void reiserfs_kfree (const void * vp, size_t size, struct super_block * s)
2116 {
2117 kfree (vp);
2118
2119 s->u.reiserfs_sb.s_kmallocs -= size;
2120 if (s->u.reiserfs_sb.s_kmallocs < 0)
2121 reiserfs_warning ("vs-8302: reiserfs_kfree: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs);
2122
2123 }
2124
2125
2126 static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh)
2127 {
2128 // int size = sizeof (struct virtual_item); /* for new item in case of insert */
2129 // int i, nr_items;
2130 // struct item_head * ih;
2131
2132 // this is enough for _ALL_ currently possible cases. In 4 k block
2133 // one may put < 170 empty items. Each virtual item eats 12
2134 // byte. The biggest direntry item may have < 256 entries. Each
2135 // entry would eat 2 byte of virtual node space
2136 return sb->s_blocksize;
2137
2138 #if 0
2139 size = sizeof (struct virtual_node) + sizeof (struct virtual_item);
2140 ih = B_N_PITEM_HEAD (bh, 0);
2141 nr_items = B_NR_ITEMS (bh);
2142 for (i = 0; i < nr_items; i ++, ih ++) {
2143 /* each item occupies some space in virtual node */
2144 size += sizeof (struct virtual_item);
2145 if (is_direntry_le_ih (ih))
2146 /* each entry and new one occupeis 2 byte in the virtual node */
2147 size += (le16_to_cpu (ih->u.ih_entry_count) + 1) * sizeof (__u16);
2148 }
2149
2150 /* 1 bit for each bitmap block to note whether bitmap block was
2151 dirtied in the operation */
2152 /* size += (SB_BMAP_NR (sb) * 2 / 8 + 4);*/
2153 return size;
2154 #endif
2155 }
2156
2157
2158
2159 /* maybe we should fail balancing we are going to perform when kmalloc
2160 fails several times. But now it will loop until kmalloc gets
2161 required memory */
2162 static int get_mem_for_virtual_node (struct tree_balance * tb)
2163 {
2164 int check_fs = 0;
2165 int size;
2166 char * buf;
2167
2168 size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path));
2169
2170 if (size > tb->vn_buf_size) {
2171 /* we have to allocate more memory for virtual node */
2172 if (tb->vn_buf) {
2173 /* free memory allocated before */
2174 reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2175 /* this is not needed if kfree is atomic */
2176 check_fs = 1;
2177 }
2178
2179 /* virtual node requires now more memory */
2180 tb->vn_buf_size = size;
2181
2182 /* get memory for virtual item */
2183 buf = reiserfs_kmalloc(size, GFP_ATOMIC, tb->tb_sb);
2184 if ( ! buf ) {
2185 /* getting memory with GFP_KERNEL priority may involve
2186 balancing now (due to indirect_to_direct conversion on
2187 dcache shrinking). So, release path and collected
2188 resourses here */
2189 free_buffers_in_tb (tb);
2190 buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb);
2191 if ( !buf ) {
2192 #ifdef CONFIG_REISERFS_CHECK
2193 reiserfs_warning ("vs-8345: get_mem_for_virtual_node: "
2194 "kmalloc failed. reiserfs kmalloced %d bytes\n",
2195 tb->tb_sb->u.reiserfs_sb.s_kmallocs);
2196 #endif
2197 tb->vn_buf_size = 0;
2198 }
2199 tb->vn_buf = buf;
2200 schedule() ;
2201 return REPEAT_SEARCH;
2202 }
2203
2204 tb->vn_buf = buf;
2205 }
2206
2207 if ( check_fs && FILESYSTEM_CHANGED_TB (tb) )
2208 return REPEAT_SEARCH;
2209
2210 return CARRY_ON;
2211 }
2212
2213
2214 #ifdef CONFIG_REISERFS_CHECK
2215 static void tb_buffer_sanity_check (struct super_block * p_s_sb,
2216 struct buffer_head * p_s_bh,
2217 const char *descr, int level) {
2218 if (p_s_bh) {
2219 if (atomic_read (&(p_s_bh->b_count)) <= 0) {
2220
2221 reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
2222 }
2223
2224 if ( ! buffer_uptodate (p_s_bh) ) {
2225 reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh);
2226 }
2227
2228 if ( ! B_IS_IN_TREE (p_s_bh) ) {
2229 reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh);
2230 }
2231
2232 if (p_s_bh->b_dev != p_s_sb->s_dev ||
2233 p_s_bh->b_size != p_s_sb->s_blocksize ||
2234 p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2235 reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): check failed for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
2236 }
2237 }
2238 }
2239 #endif
2240
2241 static void clear_all_dirty_bits(struct super_block *s,
2242 struct buffer_head *bh) {
2243 reiserfs_prepare_for_journal(s, bh, 0) ;
2244 }
2245
2246 static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
2247 {
2248 struct buffer_head * locked;
2249 #ifdef CONFIG_REISERFS_CHECK
2250 int repeat_counter = 0;
2251 #endif
2252 int i;
2253
2254 do {
2255
2256 locked = NULL;
2257
2258 for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) {
2259 if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) {
2260 /* if I understand correctly, we can only be sure the last buffer
2261 ** in the path is in the tree --clm
2262 */
2263 #ifdef CONFIG_REISERFS_CHECK
2264 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2265 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2266 tb_buffer_sanity_check (p_s_tb->tb_sb,
2267 PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i),
2268 "S",
2269 p_s_tb->tb_path->path_length - i);
2270 }
2271 #endif
2272 clear_all_dirty_bits(p_s_tb->tb_sb,
2273 PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) ;
2274
2275 if ( buffer_locked (PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) )
2276 locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i);
2277 }
2278 }
2279
2280 for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) {
2281
2282 if (p_s_tb->lnum[i] ) {
2283
2284 if ( p_s_tb->L[i] ) {
2285 #ifdef CONFIG_REISERFS_CHECK
2286 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i);
2287 #endif
2288 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i]) ;
2289 if ( buffer_locked (p_s_tb->L[i]) )
2290 locked = p_s_tb->L[i];
2291 }
2292
2293 if ( !locked && p_s_tb->FL[i] ) {
2294 #ifdef CONFIG_REISERFS_CHECK
2295 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i);
2296 #endif
2297 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i]) ;
2298 if ( buffer_locked (p_s_tb->FL[i]) )
2299 locked = p_s_tb->FL[i];
2300 }
2301
2302 if ( !locked && p_s_tb->CFL[i] ) {
2303 #ifdef CONFIG_REISERFS_CHECK
2304 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i);
2305 #endif
2306 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i]) ;
2307 if ( buffer_locked (p_s_tb->CFL[i]) )
2308 locked = p_s_tb->CFL[i];
2309 }
2310
2311 }
2312
2313 if ( !locked && (p_s_tb->rnum[i]) ) {
2314
2315 if ( p_s_tb->R[i] ) {
2316 #ifdef CONFIG_REISERFS_CHECK
2317 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i);
2318 #endif
2319 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i]) ;
2320 if ( buffer_locked (p_s_tb->R[i]) )
2321 locked = p_s_tb->R[i];
2322 }
2323
2324
2325 if ( !locked && p_s_tb->FR[i] ) {
2326 #ifdef CONFIG_REISERFS_CHECK
2327 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i);
2328 #endif
2329 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i]) ;
2330 if ( buffer_locked (p_s_tb->FR[i]) )
2331 locked = p_s_tb->FR[i];
2332 }
2333
2334 if ( !locked && p_s_tb->CFR[i] ) {
2335 #ifdef CONFIG_REISERFS_CHECK
2336 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i);
2337 #endif
2338 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i]) ;
2339 if ( buffer_locked (p_s_tb->CFR[i]) )
2340 locked = p_s_tb->CFR[i];
2341 }
2342 }
2343 }
2344 /* as far as I can tell, this is not required. The FEB list seems
2345 ** to be full of newly allocated nodes, which will never be locked,
2346 ** dirty, or anything else.
2347 ** To be safe, I'm putting in the checks and waits in. For the moment,
2348 ** they are needed to keep the code in journal.c from complaining
2349 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
2350 ** --clm
2351 */
2352 for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) {
2353 if ( p_s_tb->FEB[i] ) {
2354 clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i]) ;
2355 if (buffer_locked(p_s_tb->FEB[i])) {
2356 locked = p_s_tb->FEB[i] ;
2357 }
2358 }
2359 }
2360
2361 if (locked) {
2362 #ifdef CONFIG_REISERFS_CHECK
2363 repeat_counter++;
2364 if ( (repeat_counter % 10000) == 0) {
2365 reiserfs_warning ("wait_tb_buffers_until_released(): too many iterations waiting for buffer to unlock (%b)\n", locked);
2366
2367 /* Don't loop forever. Try to recover from possible error. */
2368
2369 return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON;
2370 }
2371 #endif
2372 __wait_on_buffer (locked);
2373 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
2374 return REPEAT_SEARCH;
2375 }
2376 }
2377
2378 } while (locked);
2379
2380 return CARRY_ON;
2381 }
2382
2383
2384 /* Prepare for balancing, that is
2385 * get all necessary parents, and neighbors;
2386 * analyze what and where should be moved;
2387 * get sufficient number of new nodes;
2388 * Balancing will start only after all resources will be collected at a time.
2389 *
2390 * When ported to SMP kernels, only at the last moment after all needed nodes
2391 * are collected in cache, will the resources be locked using the usual
2392 * textbook ordered lock acquisition algorithms. Note that ensuring that
2393 * this code neither write locks what it does not need to write lock nor locks out of order
2394 * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans
2395 *
2396 * fix is meant in the sense of render unchanging
2397 *
2398 * Latency might be improved by first gathering a list of what buffers are needed
2399 * and then getting as many of them in parallel as possible? -Hans
2400 *
2401 * Parameters:
2402 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2403 * tb tree_balance structure;
2404 * inum item number in S[h];
2405 * pos_in_item - comment this if you can
2406 * ins_ih & ins_sd are used when inserting
2407 * Returns: 1 - schedule occurred while the function worked;
2408 * 0 - schedule didn't occur while the function worked;
2409 * -1 - if no_disk_space
2410 */
2411
2412
2413 int fix_nodes (int n_op_mode,
2414 struct tree_balance * p_s_tb,
2415 struct item_head * p_s_ins_ih, // item head of item being inserted
2416 const void * data // inserted item or data to be pasted
2417 ) {
2418 int n_ret_value,
2419 n_h,
2420 n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2421 int n_pos_in_item;
2422
2423 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2424 ** during wait_tb_buffers_run
2425 */
2426 int wait_tb_buffers_run = 0 ;
2427 int windex ;
2428 struct buffer_head * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2429
2430 n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2431
2432
2433 p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb);
2434
2435 /* we prepare and log the super here so it will already be in the
2436 ** transaction when do_balance needs to change it.
2437 ** This way do_balance won't have to schedule when trying to prepare
2438 ** the super for logging
2439 */
2440 reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2441 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1) ;
2442 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2443 SB_BUFFER_WITH_SB(p_s_tb->tb_sb)) ;
2444 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2445 return REPEAT_SEARCH;
2446
2447 /* if it possible in indirect_to_direct conversion */
2448 if (buffer_locked (p_s_tbS0)) {
2449 __wait_on_buffer (p_s_tbS0);
2450 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2451 return REPEAT_SEARCH;
2452 }
2453
2454 #ifdef CONFIG_REISERFS_CHECK
2455 if ( cur_tb ) {
2456 print_cur_tb ("fix_nodes");
2457 reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes: there is pending do_balance");
2458 }
2459
2460 if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) {
2461 reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2462 "at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode);
2463 }
2464
2465 // FIXME: new items have to be of 8 byte multiples. Including new
2466 // directory items those look like old ones
2467 /*
2468 if (p_s_tb->insert_size[0] % 8)
2469 reiserfs_panic (p_s_tb->tb_sb, "vs-: fix_nodes: incorrect insert_size %d, "
2470 "mode %c",
2471 p_s_tb->insert_size[0], n_op_mode);
2472 */
2473
2474 /* Check parameters. */
2475 switch (n_op_mode) {
2476 case M_INSERT:
2477 if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) )
2478 reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2479 n_item_num, B_NR_ITEMS(p_s_tbS0));
2480 break;
2481 case M_PASTE:
2482 case M_DELETE:
2483 case M_CUT:
2484 if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) {
2485 print_block (p_s_tbS0, 0, -1, -1);
2486 printk("mode = %c insert_size = %d\n", n_op_mode, p_s_tb->insert_size[0]);
2487 reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d)", n_item_num);
2488 }
2489 break;
2490 default:
2491 reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation");
2492 }
2493 #endif
2494
2495 if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH)
2496 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2497 return REPEAT_SEARCH;
2498
2499
2500 /* Starting from the leaf level; for all levels n_h of the tree. */
2501 for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) {
2502 if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) {
2503 goto repeat;
2504 return n_ret_value;
2505 }
2506
2507 if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num,
2508 n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) {
2509 if ( n_ret_value == NO_BALANCING_NEEDED ) {
2510 /* No balancing for higher levels needed. */
2511 if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) {
2512 goto repeat;
2513 return n_ret_value;
2514 }
2515 if ( n_h != MAX_HEIGHT - 1 )
2516 p_s_tb->insert_size[n_h + 1] = 0;
2517 /* ok, analysis and resource gathering are complete */
2518 break;
2519 }
2520 goto repeat;
2521 return n_ret_value;
2522 }
2523
2524 if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) {
2525 goto repeat;
2526 return n_ret_value;
2527 }
2528
2529 if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) {
2530 goto repeat;
2531 return n_ret_value; /* No disk space, or schedule occurred and
2532 analysis may be invalid and needs to be redone. */
2533 }
2534
2535 if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) {
2536 /* We have a positive insert size but no nodes exist on this
2537 level, this means that we are creating a new root. */
2538
2539 #ifdef CONFIG_REISERFS_CHECK
2540 if ( p_s_tb->blknum[n_h] != 1 )
2541 reiserfs_panic(p_s_tb->tb_sb,"PAP-8350: fix_nodes: creating new empty root");
2542 #endif /* CONFIG_REISERFS_CHECK */
2543
2544 if ( n_h < MAX_HEIGHT - 1 )
2545 p_s_tb->insert_size[n_h + 1] = 0;
2546 }
2547 else
2548 if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) {
2549 if ( p_s_tb->blknum[n_h] > 1 ) {
2550 /* The tree needs to be grown, so this node S[n_h]
2551 which is the root node is split into two nodes,
2552 and a new node (S[n_h+1]) will be created to
2553 become the root node. */
2554
2555 #ifdef CONFIG_REISERFS_CHECK
2556 if ( n_h == MAX_HEIGHT - 1 )
2557 reiserfs_panic(p_s_tb->tb_sb, "PAP-8355: fix_nodes: attempt to create too high of a tree");
2558 #endif /* CONFIG_REISERFS_CHECK */
2559
2560 p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE;
2561 }
2562 else
2563 if ( n_h < MAX_HEIGHT - 1 )
2564 p_s_tb->insert_size[n_h + 1] = 0;
2565 }
2566 else
2567 p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2568 }
2569
2570
2571 windex = push_journal_writer("fix_nodes") ;
2572 if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) {
2573 pop_journal_writer(windex) ;
2574 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2575 wait_tb_buffers_run = 1 ;
2576 n_ret_value = REPEAT_SEARCH ;
2577 goto repeat;
2578 } else {
2579 return CARRY_ON;
2580 }
2581 } else {
2582 wait_tb_buffers_run = 1 ;
2583 pop_journal_writer(windex) ;
2584 goto repeat;
2585 }
2586
2587 repeat:
2588 // fix_nodes was unable to perform its calculation due to
2589 // filesystem got changed under us, lack of free disk space or i/o
2590 // failure. If the first is the case - the search will be
2591 // repeated. For now - free all resources acquired so far except
2592 // for the new allocated nodes
2593 {
2594 int i;
2595
2596 /* Release path buffers. */
2597 if (wait_tb_buffers_run) {
2598 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ;
2599 } else {
2600 pathrelse (p_s_tb->tb_path);
2601 }
2602 /* brelse all resources collected for balancing */
2603 for ( i = 0; i < MAX_HEIGHT; i++ ) {
2604 if (wait_tb_buffers_run) {
2605 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]);
2606 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]);
2607 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]);
2608 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]);
2609 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]);
2610 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]);
2611 }
2612
2613 brelse (p_s_tb->L[i]);p_s_tb->L[i] = 0;
2614 brelse (p_s_tb->R[i]);p_s_tb->R[i] = 0;
2615 brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = 0;
2616 brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = 0;
2617 brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = 0;
2618 brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = 0;
2619 }
2620
2621 if (wait_tb_buffers_run) {
2622 for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
2623 if ( p_s_tb->FEB[i] ) {
2624 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2625 p_s_tb->FEB[i]) ;
2626 }
2627 }
2628 }
2629 return n_ret_value;
2630 }
2631
2632 }
2633
2634
2635 /* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2636 wanted to make lines shorter */
2637 void unfix_nodes (struct tree_balance * tb)
2638 {
2639 int i;
2640
2641 /* Release path buffers. */
2642 pathrelse_and_restore (tb->tb_sb, tb->tb_path);
2643
2644 /* brelse all resources collected for balancing */
2645 for ( i = 0; i < MAX_HEIGHT; i++ ) {
2646 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]);
2647 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]);
2648 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]);
2649 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]);
2650 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]);
2651 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]);
2652
2653 brelse (tb->L[i]);
2654 brelse (tb->R[i]);
2655 brelse (tb->FL[i]);
2656 brelse (tb->FR[i]);
2657 brelse (tb->CFL[i]);
2658 brelse (tb->CFR[i]);
2659 }
2660
2661 /* deal with list of allocated (used and unused) nodes */
2662 for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
2663 if ( tb->FEB[i] ) {
2664 unsigned long blocknr = tb->FEB[i]->b_blocknr ;
2665 /* de-allocated block which was not used by balancing and
2666 bforget about buffer for it */
2667 brelse (tb->FEB[i]);
2668 reiserfs_free_block (tb->transaction_handle, blocknr);
2669 }
2670 if (tb->used[i]) {
2671 /* release used as new nodes including a new root */
2672 brelse (tb->used[i]);
2673 }
2674 }
2675 if (tb->vn_buf)
2676 reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2677
2678 }
2679
2680
2681
2682