File: /usr/src/linux/fs/hfs/binsert.c

1     /*
2      * linux/fs/hfs/binsert.c
3      *
4      * Copyright (C) 1995-1997  Paul H. Hargrove
5      * This file may be distributed under the terms of the GNU General Public License.
6      *
7      * This file contains the code to insert records in a B-tree.
8      *
9      * "XXX" in a comment is a note to myself to consider changing something.
10      *
11      * In function preconditions the term "valid" applied to a pointer to
12      * a structure means that the pointer is non-NULL and the structure it
13      * points to has all fields initialized to consistent values.
14      */
15     
16     #include "hfs_btree.h"
17     
18     /*================ File-local functions ================*/
19     
20     /* btree locking functions */
21     static inline void hfs_btree_lock(struct hfs_btree *tree)
22     {
23       while (tree->lock) 
24         hfs_sleep_on(&tree->wait);
25       tree->lock = 1;
26     }
27     
28     static inline void hfs_btree_unlock(struct hfs_btree *tree)
29     {
30       tree->lock = 0;
31       hfs_wake_up(&tree->wait);
32     }
33     
34     /*
35      * binsert_nonfull()
36      *
37      * Description:
38      *   Inserts a record in a given bnode known to have sufficient space.
39      * Input Variable(s):
40      *   struct hfs_brec* brec: pointer to the brec for the insertion
41      *   struct hfs_belem* belem: the element in the search path to insert in
42      *   struct hfs_bkey* key: pointer to the key for the record to insert
43      *   void* data: pointer to the record to insert
44      *   hfs_u16 keysize: size of the key to insert
45      *   hfs_u16 datasize: size of the record to insert
46      * Output Variable(s):
47      *   NONE
48      * Returns:
49      *   NONE
50      * Preconditions:
51      *   'brec' points to a valid (struct hfs_brec).
52      *   'belem' points to a valid (struct hfs_belem) in 'brec', the node
53      *    of which has enough free space to insert 'key' and 'data'.
54      *   'key' is a pointer to a valid (struct hfs_bkey) of length 'keysize'
55      *    which, in sorted order, belongs at the location indicated by 'brec'.
56      *   'data' is non-NULL an points to appropriate data of length 'datasize'
57      * Postconditions:
58      *   The record has been inserted in the position indicated by 'brec'.
59      */
60     static void binsert_nonfull(struct hfs_brec *brec, struct hfs_belem *belem,
61     			    const struct hfs_bkey *key, const void *data,
62     			    hfs_u8 keysize, hfs_u16 datasize)
63     {
64     	int i, rec, nrecs, size, tomove;
65     	hfs_u8 *start;
66     	struct hfs_bnode *bnode = belem->bnr.bn;
67     
68     	rec = ++(belem->record);
69     	size = ROUND(keysize+1) + datasize;
70     	nrecs = bnode->ndNRecs + 1;
71     	tomove = bnode_offset(bnode, nrecs) - bnode_offset(bnode, rec);
72     	
73     	/* adjust the record table */
74     	for (i = nrecs; i >= rec; --i) {
75     		hfs_put_hs(bnode_offset(bnode,i) + size, RECTBL(bnode,i+1));
76     	}
77     
78     	/* make room */
79     	start = bnode_key(bnode, rec);
80     	memmove(start + size, start, tomove);
81     
82     	/* copy in the key and the data*/
83     	*start = keysize;
84     	keysize = ROUND(keysize+1);
85     	memcpy(start + 1, (hfs_u8 *)key + 1, keysize-1);
86     	memcpy(start + keysize, data, datasize);
87     
88     	/* update record count */
89     	++bnode->ndNRecs;
90     }
91     
92     /*
93      * add_root()
94      *
95      * Description:
96      *   Adds a new root to a B*-tree, increasing its height.
97      * Input Variable(s):
98      *   struct hfs_btree *tree: the tree to add a new root to
99      *   struct hfs_bnode *left: the new root's first child or NULL
100      *   struct hfs_bnode *right: the new root's second child or NULL
101      * Output Variable(s):
102      *   NONE
103      * Returns:
104      *   void
105      * Preconditions:
106      *   'tree' points to a valid (struct hfs_btree).
107      *   'left' and 'right' point to valid (struct hfs_bnode)s, which
108      *    resulted from splitting the old root node, or are both NULL
109      *    if there was no root node before.
110      * Postconditions:
111      *   Upon success a new root node is added to 'tree' with either
112      *    two children ('left' and 'right') or none.
113      */
114     static void add_root(struct hfs_btree *tree,
115     		     struct hfs_bnode *left,
116     		     struct hfs_bnode *right)
117     {
118     	struct hfs_bnode_ref bnr;
119     	struct hfs_bnode *root;
120     	struct hfs_bkey *key;
121     	int keylen = tree->bthKeyLen;
122     
123     	if (left && !right) {
124     		hfs_warn("add_root: LEFT but no RIGHT\n");
125     		return;
126     	}
127     
128     	bnr = hfs_bnode_alloc(tree);
129     	if (!(root = bnr.bn)) {
130     		return;
131     	}
132     
133     	root->sticky = HFS_STICKY;
134     	tree->root = root;
135     	tree->bthRoot = root->node;
136     	++tree->bthDepth;
137     
138     	root->ndNHeight = tree->bthDepth;
139     	root->ndFLink = 0;
140     	root->ndBLink = 0;
141     
142     	if (!left) {
143     		/* tree was empty */
144     		root->ndType  = ndLeafNode;
145     		root->ndNRecs = 0;
146     
147     		tree->bthFNode = root->node;
148     		tree->bthLNode = root->node;
149     	} else {
150     		root->ndType  = ndIndxNode;
151     		root->ndNRecs = 2;
152     
153     		hfs_put_hs(sizeof(struct NodeDescriptor) + ROUND(1+keylen) +
154     			   sizeof(hfs_u32), RECTBL(root, 2));
155     		key = bnode_key(root, 1);
156     		key->KeyLen = keylen;
157     		memcpy(key->value,
158     		       ((struct hfs_bkey *)bnode_key(left, 1))->value, keylen);
159     		hfs_put_hl(left->node, bkey_record(key));
160     		
161     		hfs_put_hs(sizeof(struct NodeDescriptor) + 2*ROUND(1+keylen) +
162     			   2*sizeof(hfs_u32), RECTBL(root, 3));
163     		key = bnode_key(root, 2);
164     		key->KeyLen = keylen;
165     		memcpy(key->value,
166     		       ((struct hfs_bkey *)bnode_key(right, 1))->value, keylen);
167     		hfs_put_hl(right->node, bkey_record(key));
168     
169     		/* the former root (left) is now just a normal node */
170     		left->sticky = HFS_NOT_STICKY;
171     		if ((left->next = bhash(tree, left->node))) {
172     			left->next->prev = left;
173     		}
174     		bhash(tree, left->node) = left;
175     	}
176     	hfs_bnode_relse(&bnr);
177     	tree->dirt = 1;
178     }
179     
180     /*
181      * insert_empty_bnode()
182      *
183      * Description:
184      *   Adds an empty node to the right of 'left'.
185      * Input Variable(s):
186      *   struct hfs_btree *tree: the tree to add a node to
187      *   struct hfs_bnode *left: the node to add a node after
188      * Output Variable(s):
189      *   NONE
190      * Returns:
191      *   struct hfs_bnode_ref *: reference to the new bnode.
192      * Preconditions:
193      *   'tree' points to a valid (struct hfs_btree) with at least 1 free node.
194      *   'left' points to a valid (struct hfs_bnode) belonging to 'tree'.
195      * Postconditions:
196      *   If NULL is returned then 'tree' and 'left' are unchanged.
197      *   Otherwise a node with 0 records is inserted in the tree to the right
198      *   of the node 'left'.  The 'ndFLink' of 'left' and the 'ndBLink' of
199      *   the former right-neighbor of 'left' (if one existed) point to the
200      *   new node.	If 'left' had no right neighbor and is a leaf node the
201      *   the 'bthLNode' of 'tree' points to the new node.  The free-count and
202      *   bitmap for 'tree' are kept current by hfs_bnode_alloc() which supplies
203      *   the required node.
204      */
205     static struct hfs_bnode_ref insert_empty_bnode(struct hfs_btree *tree,
206     					       struct hfs_bnode *left)
207     {
208     	struct hfs_bnode_ref retval;
209     	struct hfs_bnode_ref right;
210     
211     	retval = hfs_bnode_alloc(tree);
212     	if (!retval.bn) {
213     		hfs_warn("hfs_binsert: out of bnodes?.\n");
214     		goto done;
215     	}
216     	retval.bn->sticky = HFS_NOT_STICKY;
217     	if ((retval.bn->next = bhash(tree, retval.bn->node))) {
218     		retval.bn->next->prev = retval.bn;
219     	}
220     	bhash(tree, retval.bn->node) = retval.bn;
221     
222     	if (left->ndFLink) {
223     		right = hfs_bnode_find(tree, left->ndFLink, HFS_LOCK_WRITE);
224     		if (!right.bn) {
225     			hfs_warn("hfs_binsert: corrupt btree.\n");
226     			hfs_bnode_bitop(tree, retval.bn->node, 0);
227     			hfs_bnode_relse(&retval);
228     			goto done;
229     		}
230     		right.bn->ndBLink = retval.bn->node;
231     		hfs_bnode_relse(&right);
232     	} else if (left->ndType == ndLeafNode) {
233     		tree->bthLNode = retval.bn->node;
234     		tree->dirt = 1;
235     	}
236     
237     	retval.bn->ndFLink   = left->ndFLink;
238     	retval.bn->ndBLink   = left->node;
239     	retval.bn->ndType    = left->ndType;
240     	retval.bn->ndNHeight = left->ndNHeight;
241     	retval.bn->ndNRecs   = 0;
242     
243     	left->ndFLink = retval.bn->node;
244     
245      done:
246     	return retval;
247     }
248     
249     /*
250      * split()
251      *
252      * Description:
253      *   Splits an over full node during insertion.
254      *   Picks the split point that results in the most-nearly equal
255      *   space usage in the new and old nodes.
256      * Input Variable(s):
257      *   struct hfs_belem *elem: the over full node.
258      *   int size: the number of bytes to be used by the new record and its key.
259      * Output Variable(s):
260      *   struct hfs_belem *elem: changed to indicate where the new record
261      *    should be inserted.
262      * Returns:
263      *   struct hfs_bnode_ref: reference to the new bnode.
264      * Preconditions:
265      *   'elem' points to a valid path element corresponding to the over full node.
266      *   'size' is positive.
267      * Postconditions:
268      *   The records in the node corresponding to 'elem' are redistributed across
269      *   the old and new nodes so that after inserting the new record, the space
270      *   usage in these two nodes is as equal as possible.
271      *   'elem' is updated so that a call to binsert_nonfull() will insert the
272      *   new record in the correct location.
273      */
274     static inline struct hfs_bnode_ref split(struct hfs_belem *elem, int size)
275     {
276     	struct hfs_bnode *bnode = elem->bnr.bn;
277     	int nrecs, cutoff, index, tmp, used, in_right;
278     	struct hfs_bnode_ref right;
279     
280     	right = insert_empty_bnode(bnode->tree, bnode);
281     	if (right.bn) {
282     		nrecs = bnode->ndNRecs;
283     		cutoff = (size + bnode_end(bnode) -
284     			      sizeof(struct NodeDescriptor) +
285     			      (nrecs+1)*sizeof(hfs_u16))/2;
286     		used = 0;
287     		in_right = 1;
288     		/* note that this only works because records sizes are even */
289     		for (index=1; index <= elem->record; ++index) {
290     			tmp = (sizeof(hfs_u16) + bnode_rsize(bnode, index))/2;
291     			used += tmp;
292     			if (used > cutoff) {
293     				goto found;
294     			}
295     			used += tmp;
296     		}
297     		tmp = (size + sizeof(hfs_u16))/2;
298     		used += tmp;
299     		if (used > cutoff) {
300     			goto found;
301     		}
302     		in_right = 0;
303     		used += tmp;
304     		for (; index <= nrecs; ++index) {
305     			tmp = (sizeof(hfs_u16) + bnode_rsize(bnode, index))/2;
306     			used += tmp;
307     			if (used > cutoff) {
308     				goto found;
309     			}
310     			used += tmp;
311     		}
312     		/* couldn't find the split point! */
313     		hfs_bnode_relse(&right);
314     	}
315     	return right;
316     
317     found:
318     	if (in_right) {
319     		elem->bnr = right;
320     		elem->record -= index-1;
321     	}
322     	hfs_bnode_shift_right(bnode, right.bn, index);
323     
324     	return right;
325     }
326     
327     /*
328      * binsert()
329      *
330      * Description:
331      *   Inserts a record in a tree known to have enough room, even if the
332      *   insertion requires the splitting of nodes.
333      * Input Variable(s):
334      *    struct hfs_brec *brec: partial path to the node to insert in
335      *    const struct hfs_bkey *key: key for the new record
336      *    const void *data: data for the new record
337      *    hfs_u8 keysize: size of the key
338      *    hfs_u16 datasize: size of the data
339      *    int reserve: number of nodes reserved in case of splits
340      * Output Variable(s):
341      *    *brec = NULL
342      * Returns:
343      *    int: 0 on success, error code on failure
344      * Preconditions:
345      *    'brec' points to a valid (struct hfs_brec) corresponding to a
346      *     record in a leaf node, after which a record is to be inserted,
347      *     or to "record 0" of the leaf node if the record is to be inserted
348      *     before all existing records in the node.	 The (struct hfs_brec)
349      *     includes all ancestors of the leaf node that are needed to
350      *     complete the insertion including the parents of any nodes that
351      *     will be split.
352      *    'key' points to a valid (struct hfs_bkey) which is appropriate
353      *     to this tree, and which belongs at the insertion point.
354      *    'data' points data appropriate for the indicated node.
355      *    'keysize' gives the size in bytes of the key.
356      *    'datasize' gives the size in bytes of the data.
357      *    'reserve' gives the number of nodes that have been reserved in the
358      *     tree to allow for splitting of nodes.
359      * Postconditions:
360      *    All 'reserve'd nodes have been either used or released.
361      *    *brec = NULL
362      *    On success the key and data have been inserted at the indicated
363      *    location in the tree, all appropriate fields of the in-core data
364      *    structures have been changed and updated versions of the on-disk
365      *    data structures have been scheduled for write-back to disk.
366      *    On failure the B*-tree is probably invalid both on disk and in-core.
367      *
368      *    XXX: Some attempt at repair might be made in the event of failure,
369      *    or the fs should be remounted read-only so things don't get worse.
370      */
371     static int binsert(struct hfs_brec *brec, const struct hfs_bkey *key,
372     		   const void *data, hfs_u8 keysize, hfs_u16 datasize,
373     		   int reserve)
374     {
375     	struct hfs_bnode_ref left, right, other;
376     	struct hfs_btree *tree = brec->tree;
377     	struct hfs_belem *belem = brec->bottom;
378     	int tmpsize = 1 + tree->bthKeyLen;
379     	struct hfs_bkey *tmpkey = hfs_malloc(tmpsize);
380     	hfs_u32 node;
381     	
382     	while ((belem >= brec->top) && (belem->flags & HFS_BPATH_OVERFLOW)) {
383     		left = belem->bnr;
384     		if (left.bn->ndFLink &&
385                         hfs_bnode_in_brec(left.bn->ndFLink, brec)) {
386     			hfs_warn("hfs_binsert: corrupt btree\n");
387     			tree->reserved -= reserve;
388     			hfs_free(tmpkey, tmpsize);
389     			return -EIO;
390     		}
391     			
392     		right = split(belem, ROUND(keysize+1) + ROUND(datasize));
393     		--reserve;
394     		--tree->reserved;
395     		if (!right.bn) {
396     			hfs_warn("hfs_binsert: unable to split node!\n");
397     			tree->reserved -= reserve;
398     			hfs_free(tmpkey, tmpsize);
399     			return -ENOSPC;
400     		}
401     		binsert_nonfull(brec, belem, key, data, keysize, datasize);
402     	
403     		if (belem->bnr.bn == left.bn) {
404     			other = right;
405     			if (belem->record == 1) {
406     				hfs_bnode_update_key(brec, belem, left.bn, 0);
407     			}
408     		} else {
409     			other = left;
410     		}
411     
412     		if (left.bn->node == tree->root->node) {
413     			add_root(tree, left.bn, right.bn);
414     			hfs_bnode_relse(&other);
415     			goto done;
416     		}
417     
418     		data = &node;
419     		datasize = sizeof(node);
420     		node = htonl(right.bn->node);
421     		key = tmpkey;
422     		keysize = tree->bthKeyLen;
423     		memcpy(tmpkey, bnode_key(right.bn, 1), keysize+1);
424     		hfs_bnode_relse(&other);
425     		
426     		--belem;
427     	}
428     
429     	if (belem < brec->top) {
430     		hfs_warn("hfs_binsert: Missing parent.\n");
431     		tree->reserved -= reserve;
432     		hfs_free(tmpkey, tmpsize);
433     		return -EIO;
434     	}
435     
436     	binsert_nonfull(brec, belem, key, data, keysize, datasize);
437     
438     done:
439     	tree->reserved -= reserve;
440     	hfs_free(tmpkey, tmpsize);
441     	return 0;
442     }
443     
444     /*================ Global functions ================*/
445     
446     /*
447      * hfs_binsert()
448      *
449      * Description:
450      *   This function inserts a new record into a b-tree.
451      * Input Variable(s):
452      *   struct hfs_btree *tree: pointer to the (struct hfs_btree) to insert in
453      *   struct hfs_bkey *key: pointer to the (struct hfs_bkey) to insert
454      *   void *data: pointer to the data to associate with 'key' in the b-tree
455      *   unsigned int datasize: the size of the data
456      * Output Variable(s):
457      *   NONE
458      * Returns:
459      *   int: 0 on success, error code on failure
460      * Preconditions:
461      *   'tree' points to a valid (struct hfs_btree)
462      *   'key' points to a valid (struct hfs_bkey)
463      *   'data' points to valid memory of length 'datasize'
464      * Postconditions:
465      *   If zero is returned then the record has been inserted in the
466      *    indicated location updating all in-core data structures and
467      *    scheduling all on-disk data structures for write-back.
468      */
469     int hfs_binsert(struct hfs_btree *tree, const struct hfs_bkey *key,
470     		const void *data, hfs_u16 datasize)
471     { 
472     	struct hfs_brec brec;
473     	struct hfs_belem *belem;
474     	int err, reserve, retval;
475     	hfs_u8 keysize;
476     
477     	if (!tree || (tree->magic != HFS_BTREE_MAGIC) || !key || !data) {
478     		hfs_warn("hfs_binsert: invalid arguments.\n");
479     		return -EINVAL;
480     	}
481     
482     	if (key->KeyLen > tree->bthKeyLen) {
483     		hfs_warn("hfs_binsert: oversized key\n");
484     		return -EINVAL;
485     	}
486     
487     restart:
488     	if (!tree->bthNRecs) {
489     		/* create the root bnode */
490     		add_root(tree, NULL, NULL);
491     		if (!hfs_brec_init(&brec, tree, HFS_BFIND_INSERT)) {
492     			hfs_warn("hfs_binsert: failed to create root.\n");
493     			return -ENOSPC;
494     		}
495     	} else {
496     		err = hfs_bfind(&brec, tree, key, HFS_BFIND_INSERT);
497     		if (err < 0) {
498     			hfs_warn("hfs_binsert: hfs_brec_find failed.\n");
499     			return err;
500     		} else if (err == 0) {
501     			hfs_brec_relse(&brec, NULL);
502     			return -EEXIST;
503     		}
504     	}
505     
506     	keysize = key->KeyLen;
507     	datasize = ROUND(datasize);
508     	belem = brec.bottom;
509     	belem->flags = 0;
510     	if (bnode_freespace(belem->bnr.bn) <
511     			    (sizeof(hfs_u16) + ROUND(keysize+1) + datasize)) {
512     		belem->flags |= HFS_BPATH_OVERFLOW;
513     	}
514     	if (belem->record == 0) {
515     		belem->flags |= HFS_BPATH_FIRST;
516     	}
517     
518     	if (!belem->flags) {
519     		hfs_brec_lock(&brec, brec.bottom);
520     		reserve = 0;
521     	} else {
522     		reserve = brec.bottom - brec.top;
523     		if (brec.top == 0) {
524     			++reserve;
525     		}
526     		/* make certain we have enough nodes to proceed */
527     		if ((tree->bthFree - tree->reserved) < reserve) {
528     			hfs_brec_relse(&brec, NULL);
529     			hfs_btree_lock(tree);
530     			if ((tree->bthFree - tree->reserved) < reserve) {
531     				hfs_btree_extend(tree);
532     			}
533     			hfs_btree_unlock(tree);
534     			if ((tree->bthFree - tree->reserved) < reserve) {
535     				return -ENOSPC;
536     			} else {
537     				goto restart;
538     			}
539     		}
540     		tree->reserved += reserve;
541     		hfs_brec_lock(&brec, NULL);
542     	}
543     
544     	retval = binsert(&brec, key, data, keysize, datasize, reserve);
545     	hfs_brec_relse(&brec, NULL);
546     	if (!retval) {
547     		++tree->bthNRecs;
548     		tree->dirt = 1;
549     	}
550     	return retval;
551     }
552