File: /usr/src/linux/fs/ntfs/inode.c
1 /*
2 * inode.c
3 *
4 * Copyright (C) 1995-1999 Martin von Löwis
5 * Copyright (C) 1996 Albert D. Cahalan
6 * Copyright (C) 1996-1997 Régis Duchesne
7 * Copyright (C) 1998 Joseph Malicki
8 * Copyright (C) 1999 Steve Dodd
9 * Copyright (C) 2000-2001 Anton Altaparmakov (AIA)
10 */
11 #include "ntfstypes.h"
12 #include "ntfsendian.h"
13 #include "struct.h"
14 #include "inode.h"
15 #include <linux/errno.h>
16 #include "macros.h"
17 #include "attr.h"
18 #include "super.h"
19 #include "dir.h"
20 #include "support.h"
21 #include "util.h"
22 #include <linux/ntfs_fs.h>
23 #include <linux/smp_lock.h>
24
25 typedef struct {
26 int recno;
27 unsigned char *record;
28 } ntfs_mft_record;
29
30 typedef struct {
31 int size;
32 int count;
33 ntfs_mft_record *records;
34 } ntfs_disk_inode;
35
36 static void ntfs_fill_mft_header(ntfs_u8 *mft, int rec_size, int seq_no,
37 int links, int flags)
38 {
39 int fixup_ofs = 0x2a;
40 int fixup_cnt = rec_size / NTFS_SECTOR_SIZE + 1;
41 int attr_ofs = (fixup_ofs + 2 * fixup_cnt + 7) & ~7;
42
43 NTFS_PUTU32(mft + 0x00, 0x454c4946); /* FILE */
44 NTFS_PUTU16(mft + 0x04, fixup_ofs); /* Offset to fixup. */
45 NTFS_PUTU16(mft + 0x06, fixup_cnt); /* Number of fixups. */
46 NTFS_PUTU64(mft + 0x08, 0); /* Logical sequence number. */
47 NTFS_PUTU16(mft + 0x10, seq_no); /* Sequence number. */
48 NTFS_PUTU16(mft + 0x12, links); /* Hard link count. */
49 NTFS_PUTU16(mft + 0x14, attr_ofs); /* Offset to attributes. */
50 NTFS_PUTU16(mft + 0x16, flags); /* Flags: 1 = In use,
51 2 = Directory. */
52 NTFS_PUTU32(mft + 0x18, attr_ofs + 8); /* Bytes in use. */
53 NTFS_PUTU32(mft + 0x1c, rec_size); /* Total allocated size. */
54 NTFS_PUTU64(mft + 0x20, 0); /* Base mft record. */
55 NTFS_PUTU16(mft + 0x28, 0); /* Next attr instance. */
56 NTFS_PUTU16(mft + fixup_ofs, 1); /* Fixup word. */
57 NTFS_PUTU32(mft + attr_ofs, (__u32)-1); /* End of attributes marker. */
58 }
59
60 /*
61 * Search in an inode an attribute by type and name.
62 * FIXME: Check that when attributes are inserted all attribute list
63 * attributes are expanded otherwise need to modify this function to deal
64 * with attribute lists. (AIA)
65 */
66 ntfs_attribute *ntfs_find_attr(ntfs_inode *ino, int type, char *name)
67 {
68 int i;
69
70 if (!ino) {
71 ntfs_error("ntfs_find_attr: NO INODE!\n");
72 return 0;
73 }
74 for (i = 0; i < ino->attr_count; i++) {
75 if (type < ino->attrs[i].type)
76 return 0;
77 if (type == ino->attrs[i].type) {
78 if (!name) {
79 if (!ino->attrs[i].name)
80 return ino->attrs + i;
81 } else if (ino->attrs[i].name &&
82 !ntfs_ua_strncmp(ino->attrs[i].name, name,
83 strlen(name)))
84 return ino->attrs + i;
85 }
86 }
87 return 0;
88 }
89
90 /*
91 * Insert all attributes from the record mftno of the MFT in the inode ino.
92 * If mftno is a base mft record we abort as soon as we find the attribute
93 * list, but only on the first pass. We will get called later when the attribute
94 * list attribute is being parsed so we need to distinguish the two cases.
95 * FIXME: We should be performing structural consistency checks. (AIA)
96 * Return 0 on success or -errno on error.
97 */
98 static int ntfs_insert_mft_attributes(ntfs_inode* ino, char *mft, int mftno)
99 {
100 int i, error, type, len, present = 0;
101 char *it;
102
103 /* Check for duplicate extension record. */
104 for(i = 0; i < ino->record_count; i++)
105 if (ino->records[i] == mftno) {
106 if (i)
107 return 0;
108 present = 1;
109 break;
110 }
111 if (!present) {
112 /* (re-)allocate space if necessary. */
113 if (ino->record_count % 8 == 0) {
114 int *new;
115
116 new = ntfs_malloc((ino->record_count + 8) *
117 sizeof(int));
118 if (!new)
119 return -ENOMEM;
120 if (ino->records) {
121 for (i = 0; i < ino->record_count; i++)
122 new[i] = ino->records[i];
123 ntfs_free(ino->records);
124 }
125 ino->records = new;
126 }
127 ino->records[ino->record_count] = mftno;
128 ino->record_count++;
129 }
130 it = mft + NTFS_GETU16(mft + 0x14); /* mft->attrs_offset */
131 do {
132 type = NTFS_GETU32(it);
133 len = NTFS_GETU32(it + 4);
134 if (type != -1) {
135 error = ntfs_insert_attribute(ino, it);
136 if (error)
137 return error;
138 }
139 /* If we have just processed the attribute list and this is
140 * the first time we are parsing this (base) mft record then we
141 * are done so that the attribute list gets parsed before the
142 * entries in the base mft record. Otherwise we run into
143 * problems with encountering attributes out of order and when
144 * this happens with different attribute extents we die. )-:
145 * This way we are ok as the attribute list is always sorted
146 * fully and correctly. (-: */
147 if (type == 0x20 && !present)
148 return 0;
149 it += len;
150 } while (type != -1); /* Attribute listing ends with type -1. */
151 return 0;
152 }
153
154 /*
155 * Insert a single specific attribute from the record mftno of the MFT in the
156 * inode ino. We disregard the attribute list assuming we have already parsed
157 * it.
158 * FIXME: We should be performing structural consistency checks. (AIA)
159 * Return 0 on success or -errno on error.
160 */
161 static int ntfs_insert_mft_attribute(ntfs_inode* ino, int mftno,
162 ntfs_u8 *attr)
163 {
164 int i, error, present = 0;
165
166 /* Check for duplicate extension record. */
167 for(i = 0; i < ino->record_count; i++)
168 if (ino->records[i] == mftno) {
169 present = 1;
170 break;
171 }
172 if (!present) {
173 /* (re-)allocate space if necessary. */
174 if (ino->record_count % 8 == 0) {
175 int *new;
176
177 new = ntfs_malloc((ino->record_count + 8) *
178 sizeof(int));
179 if (!new)
180 return -ENOMEM;
181 if (ino->records) {
182 for (i = 0; i < ino->record_count; i++)
183 new[i] = ino->records[i];
184 ntfs_free(ino->records);
185 }
186 ino->records = new;
187 }
188 ino->records[ino->record_count] = mftno;
189 ino->record_count++;
190 }
191 if (NTFS_GETU32(attr) == -1) {
192 ntfs_debug(DEBUG_FILE3, "ntfs_insert_mft_attribute: attribute "
193 "type is -1.\n");
194 return 0;
195 }
196 error = ntfs_insert_attribute(ino, attr);
197 if (error)
198 return error;
199 return 0;
200 }
201
202 /* Read and insert all the attributes of an 'attribute list' attribute.
203 * Return the number of remaining bytes in *plen. */
204 static int parse_attributes(ntfs_inode *ino, ntfs_u8 *alist, int *plen)
205 {
206 ntfs_u8 *mft, *attr;
207 int mftno, l, error;
208 int last_mft = -1;
209 int len = *plen;
210
211 if (!ino->attr) {
212 ntfs_error("parse_attributes: called on inode 0x%x without a "
213 "loaded base mft record.\n", ino->i_number);
214 return -EINVAL;
215 }
216 mft = ntfs_malloc(ino->vol->mft_record_size);
217 if (!mft)
218 return -ENOMEM;
219 while (len > 8) {
220 l = NTFS_GETU16(alist + 4);
221 if (l > len)
222 break;
223 /* Process an attribute description. */
224 mftno = NTFS_GETU32(alist + 0x10);
225 /* FIXME: The mft reference (alist + 0x10) is __s64.
226 * - Not a problem unless we encounter a huge partition.
227 * - Should be consistency checking the sequence numbers
228 * though! This should maybe happen in
229 * ntfs_read_mft_record() itself and a hotfix could
230 * then occur there or the user notified to run
231 * ntfsck. (AIA) */
232 if (mftno != ino->i_number && mftno != last_mft) {
233 last_mft = mftno;
234 error = ntfs_read_mft_record(ino->vol, mftno, mft);
235 if (error) {
236 ntfs_debug(DEBUG_FILE3, "parse_attributes: "
237 "ntfs_read_mft_record(mftno = 0x%x) "
238 "failed\n", mftno);
239 ntfs_free(mft);
240 return error;
241 }
242 }
243 attr = ntfs_find_attr_in_mft_rec(
244 ino->vol, /* ntfs volume */
245 mftno == ino->i_number ?/* mft record is: */
246 ino->attr: /* base record */
247 mft, /* extension record */
248 NTFS_GETU32(alist + 0), /* type */
249 (wchar_t*)(alist + alist[7]), /* name */
250 alist[6], /* name length */
251 1, /* ignore case */
252 NTFS_GETU16(alist + 24) /* instance number */
253 );
254 if (!attr) {
255 ntfs_error("parse_attributes: mft records 0x%x and/or "
256 "0x%x corrupt!\n", ino->i_number, mftno);
257 ntfs_free(mft);
258 return -EINVAL; /* FIXME: Better error code? (AIA) */
259 }
260 error = ntfs_insert_mft_attribute(ino, mftno, attr);
261 if (error) {
262 ntfs_debug(DEBUG_FILE3, "parse_attributes: "
263 "ntfs_insert_mft_attribute(mftno 0x%x, "
264 "attribute type 0x%x) failed\n", mftno,
265 NTFS_GETU32(alist + 0));
266 ntfs_free(mft);
267 return error;
268 }
269 len -= l;
270 alist += l;
271 }
272 ntfs_free(mft);
273 *plen = len;
274 return 0;
275 }
276
277 static void ntfs_load_attributes(ntfs_inode* ino)
278 {
279 ntfs_attribute *alist;
280 int datasize;
281 int offset, len, delta;
282 char *buf;
283 ntfs_volume *vol = ino->vol;
284
285 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 1\n", ino->i_number);
286 if (ntfs_insert_mft_attributes(ino, ino->attr, ino->i_number))
287 return;
288 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 2\n", ino->i_number);
289 alist = ntfs_find_attr(ino, vol->at_attribute_list, 0);
290 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 3\n", ino->i_number);
291 if (!alist)
292 return;
293 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 4\n", ino->i_number);
294 datasize = alist->size;
295 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: alist->size = 0x%x\n",
296 ino->i_number, alist->size);
297 if (alist->resident) {
298 parse_attributes(ino, alist->d.data, &datasize);
299 return;
300 }
301 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 5\n", ino->i_number);
302 buf = ntfs_malloc(1024);
303 if (!buf) /* FIXME: Should be passing error code to caller. (AIA) */
304 return;
305 delta = 0;
306 for (offset = 0; datasize; datasize -= len, offset += len) {
307 ntfs_io io;
308
309 io.fn_put = ntfs_put;
310 io.fn_get = 0;
311 io.param = buf + delta;
312 len = 1024 - delta;
313 if (len > datasize)
314 len = datasize;
315 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: len = %i\n",
316 ino->i_number, len);
317 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: delta = %i\n",
318 ino->i_number, delta);
319 io.size = len;
320 if (ntfs_read_attr(ino, vol->at_attribute_list, 0, offset,
321 &io))
322 ntfs_error("error in load_attributes\n");
323 delta += len;
324 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: after += len, "
325 "delta = %i\n", ino->i_number, delta);
326 parse_attributes(ino, buf, &delta);
327 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: after "
328 "parse_attr, delta = %i\n", ino->i_number,
329 delta);
330 if (delta)
331 /* Move remaining bytes to buffer start. */
332 ntfs_memmove(buf, buf + len - delta, delta);
333 }
334 ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 6\n", ino->i_number);
335 ntfs_free(buf);
336 }
337
338 int ntfs_init_inode(ntfs_inode *ino, ntfs_volume *vol, int inum)
339 {
340 char *buf;
341 int error;
342
343 ntfs_debug(DEBUG_FILE1, "Initializing inode 0x%x\n", inum);
344 ino->i_number = inum;
345 ino->vol = vol;
346 ino->attr = buf = ntfs_malloc(vol->mft_record_size);
347 if (!buf)
348 return -ENOMEM;
349 error = ntfs_read_mft_record(vol, inum, ino->attr);
350 if (error) {
351 ntfs_debug(DEBUG_OTHER, "Init inode: 0x%x failed\n", inum);
352 return error;
353 }
354 ntfs_debug(DEBUG_FILE2, "Init inode: got mft 0x%x\n", inum);
355 ino->sequence_number = NTFS_GETU16(buf + 0x10);
356 ino->attr_count = 0;
357 ino->record_count = 0;
358 ino->records = 0;
359 ino->attrs = 0;
360 ntfs_load_attributes(ino);
361 ntfs_debug(DEBUG_FILE2, "Init inode: done 0x%x\n", inum);
362 return 0;
363 }
364
365 void ntfs_clear_inode(ntfs_inode *ino)
366 {
367 int i;
368 if (!ino->attr) {
369 ntfs_error("ntfs_clear_inode: double free\n");
370 return;
371 }
372 ntfs_free(ino->attr);
373 ino->attr = 0;
374 ntfs_free(ino->records);
375 ino->records = 0;
376 for (i = 0; i < ino->attr_count; i++) {
377 if (ino->attrs[i].name)
378 ntfs_free(ino->attrs[i].name);
379 if (ino->attrs[i].resident) {
380 if (ino->attrs[i].d.data)
381 ntfs_free(ino->attrs[i].d.data);
382 } else {
383 if (ino->attrs[i].d.r.runlist)
384 ntfs_vfree(ino->attrs[i].d.r.runlist);
385 }
386 }
387 ntfs_free(ino->attrs);
388 ino->attrs = 0;
389 }
390
391 /* Check and fixup a MFT record. */
392 int ntfs_check_mft_record(ntfs_volume *vol, char *record)
393 {
394 return ntfs_fixup_record(record, "FILE", vol->mft_record_size);
395 }
396
397 /* Return (in result) the value indicating the next available attribute
398 * chunk number. Works for inodes w/o extension records only. */
399 int ntfs_allocate_attr_number(ntfs_inode *ino, int *result)
400 {
401 if (ino->record_count != 1)
402 return -EOPNOTSUPP;
403 *result = NTFS_GETU16(ino->attr + 0x28);
404 NTFS_PUTU16(ino->attr + 0x28, (*result) + 1);
405 return 0;
406 }
407
408 /* Find the location of an attribute in the inode. A name of NULL indicates
409 * unnamed attributes. Return pointer to attribute or NULL if not found. */
410 char *ntfs_get_attr(ntfs_inode *ino, int attr, char *name)
411 {
412 /* Location of first attribute. */
413 char *it = ino->attr + NTFS_GETU16(ino->attr + 0x14);
414 int type;
415 int len;
416
417 /* Only check for magic DWORD here, fixup should have happened before.*/
418 if (!IS_MFT_RECORD(ino->attr))
419 return 0;
420 do {
421 type = NTFS_GETU32(it);
422 len = NTFS_GETU16(it + 4);
423 /* We found the attribute type. Is the name correct, too? */
424 if (type == attr) {
425 int namelen = NTFS_GETU8(it + 9);
426 char *name_it, *n = name;
427 /* Match given name and attribute name if present.
428 Make sure attribute name is Unicode. */
429 if (!name) {
430 goto check_namelen;
431 } else if (namelen) {
432 for (name_it = it + NTFS_GETU16(it + 10);
433 namelen; n++, name_it += 2, namelen--)
434 if (*name_it != *n || name_it[1])
435 break;
436 check_namelen:
437 if (!namelen)
438 break;
439 }
440 }
441 it += len;
442 } while (type != -1); /* List of attributes ends with type -1. */
443 if (type == -1)
444 return 0;
445 return it;
446 }
447
448 __s64 ntfs_get_attr_size(ntfs_inode *ino, int type, char *name)
449 {
450 ntfs_attribute *attr = ntfs_find_attr(ino, type, name);
451 if (!attr)
452 return 0;
453 return
454 attr->size;
455 }
456
457 int ntfs_attr_is_resident(ntfs_inode *ino, int type, char *name)
458 {
459 ntfs_attribute *attr = ntfs_find_attr(ino, type, name);
460 if (!attr)
461 return 0;
462 return attr->resident;
463 }
464
465 /*
466 * A run is coded as a type indicator, an unsigned length, and a signed cluster
467 * offset.
468 * . To save space, length and offset are fields of variable length. The low
469 * nibble of the type indicates the width of the length :), the high nibble
470 * the width of the offset.
471 * . The first offset is relative to cluster 0, later offsets are relative to
472 * the previous cluster.
473 *
474 * This function decodes a run. Length is an output parameter, data and cluster
475 * are in/out parameters.
476 */
477 int ntfs_decompress_run(unsigned char **data, int *length,
478 ntfs_cluster_t *cluster, int *ctype)
479 {
480 unsigned char type = *(*data)++;
481 *ctype = 0;
482 switch (type & 0xF) {
483 case 1:
484 *length = NTFS_GETS8(*data);
485 break;
486 case 2:
487 *length = NTFS_GETS16(*data);
488 break;
489 case 3:
490 *length = NTFS_GETS24(*data);
491 break;
492 case 4:
493 *length = NTFS_GETS32(*data);
494 break;
495 /* Note: cases 5-8 are probably pointless to code, since how
496 * many runs > 4GB of length are there? At the most, cases 5
497 * and 6 are probably necessary, and would also require making
498 * length 64-bit throughout. */
499 default:
500 ntfs_error("Can't decode run type field 0x%x\n", type);
501 return -1;
502 }
503 // ntfs_debug(DEBUG_FILE3, "ntfs_decompress_run: length = 0x%x\n",*length);
504 if (*length < 0)
505 {
506 ntfs_error("Negative run length decoded\n");
507 return -1;
508 }
509 *data += (type & 0xF);
510 switch (type & 0xF0) {
511 case 0:
512 *ctype = 2;
513 break;
514 case 0x10:
515 *cluster += NTFS_GETS8(*data);
516 break;
517 case 0x20:
518 *cluster += NTFS_GETS16(*data);
519 break;
520 case 0x30:
521 *cluster += NTFS_GETS24(*data);
522 break;
523 case 0x40:
524 *cluster += NTFS_GETS32(*data);
525 break;
526 #if 0 /* Keep for future, in case ntfs_cluster_t ever becomes 64bit. */
527 case 0x50:
528 *cluster += NTFS_GETS40(*data);
529 break;
530 case 0x60:
531 *cluster += NTFS_GETS48(*data);
532 break;
533 case 0x70:
534 *cluster += NTFS_GETS56(*data);
535 break;
536 case 0x80:
537 *cluster += NTFS_GETS64(*data);
538 break;
539 #endif
540 default:
541 ntfs_error("Can't decode run type field 0x%x\n", type);
542 return -1;
543 }
544 // ntfs_debug(DEBUG_FILE3, "ntfs_decompress_run: cluster = 0x%x\n",
545 // *cluster);
546 *data += (type >> 4);
547 return 0;
548 }
549
550 static void dump_runlist(const ntfs_runlist *rl, const int rlen);
551
552 /*
553 * FIXME: ntfs_readwrite_attr() has the effect of writing @dest to @offset of
554 * the attribute value of the attribute @attr in the in memory inode @ino.
555 * If the attribute value of @attr is non-resident the value's contents at
556 * @offset are actually written to disk (from @dest). The on disk mft record
557 * describing the non-resident attribute value is not updated!
558 * If the attribute value is resident then the value is written only in
559 * memory. The on disk mft record containing the value is not written to disk.
560 * A possible fix would be to call ntfs_update_inode() before returning. (AIA)
561 */
562 /* Reads l bytes of the attribute (attr, name) of ino starting at offset on
563 * vol into buf. Returns the number of bytes read in the ntfs_io struct.
564 * Returns 0 on success, errno on failure */
565 int ntfs_readwrite_attr(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset,
566 ntfs_io *dest)
567 {
568 int rnum, s_vcn, error, clustersizebits;
569 ntfs_cluster_t cluster, s_cluster, vcn, len;
570 __s64 l, chunk, copied;
571
572 ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): %s 0x%x bytes at offset "
573 "0x%Lx %s inode 0x%x, attr type 0x%x.\n",
574 dest->do_read ? "Read" : "Write", dest->size, offset,
575 dest->do_read ? "from" : "to", ino->i_number,
576 attr->type);
577 l = dest->size;
578 if (l == 0)
579 return 0;
580 if (dest->do_read) {
581 /* If read _starts_ beyond end of stream, return nothing. */
582 if (offset >= attr->size) {
583 dest->size = 0;
584 return 0;
585 }
586 /* If read _extends_ beyond end of stream, return as much
587 * initialised data as we have. */
588 if (offset + l >= attr->size)
589 l = dest->size = attr->size - offset;
590 } else {
591 /*
592 * If write extends beyond _allocated_ size, extend attribute,
593 * updating attr->allocated and attr->size in the process. (AIA)
594 */
595 if (offset + l > attr->allocated) {
596 error = ntfs_resize_attr(ino, attr, offset + l);
597 if (error)
598 return error;
599 } else if (offset + l > attr->size)
600 /* If amount of data has increased: update. */
601 attr->size = offset + l;
602 /* If amount of initialised data has increased: update. */
603 if (offset + l > attr->initialized) {
604 /* FIXME: Zero-out the section between the old
605 * initialised length and the write start. (AIA) */
606 attr->initialized = offset + l;
607 }
608 }
609 if (attr->resident) {
610 if (dest->do_read)
611 dest->fn_put(dest, (ntfs_u8*)attr->d.data + offset, l);
612 else
613 dest->fn_get((ntfs_u8*)attr->d.data + offset, dest, l);
614 dest->size = l;
615 return 0;
616 }
617 if (dest->do_read) {
618 /* Read uninitialized data. */
619 if (offset >= attr->initialized)
620 return ntfs_read_zero(dest, l);
621 if (offset + l > attr->initialized) {
622 dest->size = chunk = offset + l - attr->initialized;
623 error = ntfs_readwrite_attr(ino, attr, offset, dest);
624 if (error)
625 return error;
626 return ntfs_read_zero(dest, l - chunk);
627 }
628 if (attr->flags & ATTR_IS_COMPRESSED)
629 return ntfs_read_compressed(ino, attr, offset, dest);
630 } else {
631 if (attr->flags & ATTR_IS_COMPRESSED)
632 return ntfs_write_compressed(ino, attr, offset, dest);
633 }
634 vcn = 0;
635 clustersizebits = ino->vol->cluster_size_bits;
636 s_vcn = offset >> clustersizebits;
637 for (rnum = 0; rnum < attr->d.r.len &&
638 vcn + attr->d.r.runlist[rnum].len <= s_vcn; rnum++)
639 vcn += attr->d.r.runlist[rnum].len;
640 if (rnum == attr->d.r.len) {
641 ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): EOPNOTSUPP: "
642 "inode = 0x%x, rnum = %i, offset = 0x%Lx, vcn = 0x%x, "
643 "s_vcn = 0x%x.\n", ino->i_number, rnum, offset, vcn,
644 s_vcn);
645 dump_runlist(attr->d.r.runlist, attr->d.r.len);
646 /*FIXME: Should extend runlist. */
647 return -EOPNOTSUPP;
648 }
649 copied = 0;
650 while (l) {
651 s_vcn = offset >> clustersizebits;
652 cluster = attr->d.r.runlist[rnum].lcn;
653 len = attr->d.r.runlist[rnum].len;
654 s_cluster = cluster + s_vcn - vcn;
655 chunk = ((__s64)(vcn + len) << clustersizebits) - offset;
656 if (chunk > l)
657 chunk = l;
658 dest->size = chunk;
659 error = ntfs_getput_clusters(ino->vol, s_cluster, offset -
660 ((__s64)s_vcn << clustersizebits), dest);
661 if (error) {
662 ntfs_error("Read/write error.\n");
663 dest->size = copied;
664 return error;
665 }
666 l -= chunk;
667 copied += chunk;
668 offset += chunk;
669 if (l && offset >= ((__s64)(vcn + len) << clustersizebits)) {
670 rnum++;
671 vcn += len;
672 cluster = attr->d.r.runlist[rnum].lcn;
673 len = attr->d.r.runlist[rnum].len;
674 }
675 }
676 dest->size = copied;
677 return 0;
678 }
679
680 int ntfs_read_attr(ntfs_inode *ino, int type, char *name, __s64 offset,
681 ntfs_io *buf)
682 {
683 ntfs_attribute *attr;
684
685 buf->do_read = 1;
686 attr = ntfs_find_attr(ino, type, name);
687 if (!attr) {
688 ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): attr 0x%x not found "
689 "in inode 0x%x\n", type, ino->i_number);
690 return -EINVAL;
691 }
692 return ntfs_readwrite_attr(ino, attr, offset, buf);
693 }
694
695 int ntfs_write_attr(ntfs_inode *ino, int type, char *name, __s64 offset,
696 ntfs_io *buf)
697 {
698 ntfs_attribute *attr;
699
700 buf->do_read = 0;
701 attr = ntfs_find_attr(ino, type, name);
702 if (!attr) {
703 ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): attr 0x%x not found "
704 "in inode 0x%x\n", type, ino->i_number);
705 return -EINVAL;
706 }
707 return ntfs_readwrite_attr(ino, attr, offset, buf);
708 }
709
710 int ntfs_vcn_to_lcn(ntfs_inode *ino, int vcn)
711 {
712 int rnum;
713 ntfs_attribute *data;
714
715 data = ntfs_find_attr(ino, ino->vol->at_data, 0);
716 /* It's hard to give an error code. */
717 if (!data || data->resident || data->flags & (ATTR_IS_COMPRESSED |
718 ATTR_IS_ENCRYPTED))
719 return -1;
720 if (data->size <= (__s64)vcn << ino->vol->cluster_size_bits)
721 return -1;
722 /*
723 * For Linux, block number 0 represents a hole. - No problem as we do
724 * not support bmap in any form whatsoever. The FIBMAP sys call is
725 * deprecated anyway and NTFS is not a block based file system so
726 * allowing bmapping is complete and utter garbage IMO. Use mmap once
727 * we implement it... (AIA)
728 */
729 if (data->initialized <= (__s64)vcn << ino->vol->cluster_size_bits)
730 return 0;
731 for (rnum = 0; rnum < data->d.r.len &&
732 vcn >= data->d.r.runlist[rnum].len; rnum++)
733 vcn -= data->d.r.runlist[rnum].len;
734 /* We need to cope with sparse runs. (AIA) */
735 return data->d.r.runlist[rnum].lcn + vcn;
736 }
737
738 static int allocate_store(ntfs_volume *vol, ntfs_disk_inode *store, int count)
739 {
740 int i;
741
742 if (store->count > count)
743 return 0;
744 if (store->size < count) {
745 ntfs_mft_record *n = ntfs_malloc((count + 4) *
746 sizeof(ntfs_mft_record));
747 if (!n)
748 return -ENOMEM;
749 if (store->size) {
750 for (i = 0; i < store->size; i++)
751 n[i] = store->records[i];
752 ntfs_free(store->records);
753 }
754 store->size = count + 4;
755 store->records = n;
756 }
757 for (i = store->count; i < count; i++) {
758 store->records[i].record = ntfs_malloc(vol->mft_record_size);
759 if (!store->records[i].record)
760 return -ENOMEM;
761 store->count++;
762 }
763 return 0;
764 }
765
766 static void deallocate_store(ntfs_disk_inode* store)
767 {
768 int i;
769
770 for (i = 0; i < store->count; i++)
771 ntfs_free(store->records[i].record);
772 ntfs_free(store->records);
773 store->count = store->size = 0;
774 store->records = 0;
775 }
776
777 /**
778 * layout_runs - compress runlist into mapping pairs array
779 * @attr: attribute containing the runlist to compress
780 * @rec: destination buffer to hold the mapping pairs array
781 * @offs: current position in @rec (in/out variable)
782 * @size: size of the buffer @rec
783 *
784 * layout_runs walks the runlist in @attr, compresses it and writes it out the
785 * resulting mapping pairs array into @rec (up to a maximum of @size bytes are
786 * written). On entry @offs is the offset in @rec at which to begin writing the
787 * mapping pairs array. On exit, it contains the offset in @rec of the first
788 * byte after the end of the mapping pairs array.
789 */
790 static int layout_runs(ntfs_attribute *attr, char *rec, int *offs, int size)
791 {
792 int i, len, offset, coffs;
793 /* ntfs_cluster_t MUST be signed! (AIA) */
794 ntfs_cluster_t cluster, rclus;
795 ntfs_runlist *rl = attr->d.r.runlist;
796 cluster = 0;
797 offset = *offs;
798 for (i = 0; i < attr->d.r.len; i++) {
799 /*
800 * We cheat with this check on the basis that lcn will never
801 * be less than -1 and the lcn delta will fit in signed
802 * 32-bits (ntfs_cluster_t). (AIA)
803 */
804 if (rl[i].lcn < (ntfs_cluster_t)-1) {
805 ntfs_error("layout_runs() encountered an out of bounds "
806 "cluster delta, lcn = %i.\n",
807 rl[i].lcn);
808 return -ERANGE;
809 }
810 rclus = rl[i].lcn - cluster;
811 len = rl[i].len;
812 rec[offset] = 0;
813 if (offset + 9 > size)
814 return -E2BIG; /* It might still fit, but this
815 * simplifies testing. */
816 /*
817 * Run length is stored as signed number, so deal with it
818 * properly, i.e. observe that a negative number will have all
819 * its most significant bits set to 1 but we don't store that
820 * in the mapping pairs array. We store the smallest type of
821 * negative number required, thus in the first if we check
822 * whether len fits inside a signed byte and if so we store it
823 * as such, the next ifs check for a signed short, then a signed
824 * 24-bit and finally the full blown signed 32-bit. Same goes
825 * for rlus below. (AIA)
826 */
827 if (len >= -0x80 && len <= 0x7f) {
828 NTFS_PUTU8(rec + offset + 1, len & 0xff);
829 coffs = 1;
830 } else if (len >= -0x8000 && len <= 0x7fff) {
831 NTFS_PUTU16(rec + offset + 1, len & 0xffff);
832 coffs = 2;
833 } else if (len >= -0x800000 && len <= 0x7fffff) {
834 NTFS_PUTU24(rec + offset + 1, len & 0xffffff);
835 coffs = 3;
836 } else /* if (len >= -0x80000000LL && len <= 0x7fffffff */ {
837 NTFS_PUTU32(rec + offset + 1, len);
838 coffs = 4;
839 } /* else ... FIXME: When len becomes 64-bit we need to extend
840 * the else if () statements. (AIA) */
841 *(rec + offset) |= coffs++;
842 if (rl[i].lcn == (ntfs_cluster_t)-1) /* Compressed run. */
843 /* Nothing */;
844 else if (rclus >= -0x80 && rclus <= 0x7f) {
845 *(rec + offset) |= 0x10;
846 NTFS_PUTS8(rec + offset + coffs, rclus & 0xff);
847 coffs += 1;
848 } else if (rclus >= -0x8000 && rclus <= 0x7fff) {
849 *(rec + offset) |= 0x20;
850 NTFS_PUTS16(rec + offset + coffs, rclus & 0xffff);
851 coffs += 2;
852 } else if (rclus >= -0x800000 && rclus <= 0x7fffff) {
853 *(rec + offset) |= 0x30;
854 NTFS_PUTS24(rec + offset + coffs, rclus & 0xffffff);
855 coffs += 3;
856 } else /* if (rclus >= -0x80000000LL && rclus <= 0x7fffffff)*/ {
857 *(rec + offset) |= 0x40;
858 NTFS_PUTS32(rec + offset + coffs, rclus
859 /* & 0xffffffffLL */);
860 coffs += 4;
861 } /* FIXME: When rclus becomes 64-bit.
862 else if (rclus >= -0x8000000000 && rclus <= 0x7FFFFFFFFF) {
863 *(rec + offset) |= 0x50;
864 NTFS_PUTS40(rec + offset + coffs, rclus &
865 0xffffffffffLL);
866 coffs += 5;
867 } else if (rclus >= -0x800000000000 &&
868 rclus <= 0x7FFFFFFFFFFF) {
869 *(rec + offset) |= 0x60;
870 NTFS_PUTS48(rec + offset + coffs, rclus &
871 0xffffffffffffLL);
872 coffs += 6;
873 } else if (rclus >= -0x80000000000000 &&
874 rclus <= 0x7FFFFFFFFFFFFF) {
875 *(rec + offset) |= 0x70;
876 NTFS_PUTS56(rec + offset + coffs, rclus &
877 0xffffffffffffffLL);
878 coffs += 7;
879 } else {
880 *(rec + offset) |= 0x80;
881 NTFS_PUTS64(rec + offset + coffs, rclus);
882 coffs += 8;
883 } */
884 offset += coffs;
885 if (rl[i].lcn)
886 cluster = rl[i].lcn;
887 }
888 if (offset >= size)
889 return -E2BIG;
890 /* Terminating null. */
891 *(rec + offset++) = 0;
892 *offs = offset;
893 return 0;
894 }
895
896 static void count_runs(ntfs_attribute *attr, char *buf)
897 {
898 ntfs_u32 first, count, last, i;
899
900 first = 0;
901 for (i = 0, count = 0; i < attr->d.r.len; i++)
902 count += attr->d.r.runlist[i].len;
903 last = first + count - 1;
904 NTFS_PUTU64(buf + 0x10, first);
905 NTFS_PUTU64(buf + 0x18, last);
906 }
907
908 /**
909 * layout_attr - convert in memory attribute to on disk attribute record
910 * @attr: in memory attribute to convert
911 * @buf: destination buffer for on disk attribute record
912 * @size: size of the destination buffer
913 * @psize: size of converted on disk attribute record (out variable)
914 *
915 * layout_attr() takes the attribute @attr and converts it into the appropriate
916 * on disk structure, writing it into @buf (up to @size bytes are written).
917 *
918 * On success we return 0 and set @*psize to the actual byte size of the on-
919 * disk attribute that was written into @buf.
920 */
921 static int layout_attr(ntfs_attribute *attr, char *buf, int size, int *psize)
922 {
923 int nameoff, hdrsize, asize;
924
925 if (attr->resident) {
926 nameoff = 0x18;
927 hdrsize = (nameoff + 2 * attr->namelen + 7) & ~7;
928 asize = (hdrsize + attr->size + 7) & ~7;
929 if (size < asize)
930 return -E2BIG;
931 NTFS_PUTU32(buf + 0x10, attr->size);
932 NTFS_PUTU8(buf + 0x16, attr->indexed);
933 NTFS_PUTU16(buf + 0x14, hdrsize);
934 if (attr->size)
935 ntfs_memcpy(buf + hdrsize, attr->d.data, attr->size);
936 } else {
937 int error;
938
939 if (attr->flags & ATTR_IS_COMPRESSED)
940 nameoff = 0x48;
941 else
942 nameoff = 0x40;
943 hdrsize = (nameoff + 2 * attr->namelen + 7) & ~7;
944 if (size < hdrsize)
945 return -E2BIG;
946 /* Make asize point at the end of the attribute record header,
947 i.e. at the beginning of the mapping pairs array. */
948 asize = hdrsize;
949 error = layout_runs(attr, buf, &asize, size);
950 /* Now, asize points one byte beyond the end of the mapping
951 pairs array. */
952 if (error)
953 return error;
954 /* The next attribute has to begin on 8-byte boundary. */
955 asize = (asize + 7) & ~7;
956 /* FIXME: fragments */
957 count_runs(attr, buf);
958 NTFS_PUTU16(buf + 0x20, hdrsize);
959 NTFS_PUTU16(buf + 0x22, attr->cengine);
960 NTFS_PUTU32(buf + 0x24, 0);
961 NTFS_PUTS64(buf + 0x28, attr->allocated);
962 NTFS_PUTS64(buf + 0x30, attr->size);
963 NTFS_PUTS64(buf + 0x38, attr->initialized);
964 if (attr->flags & ATTR_IS_COMPRESSED)
965 NTFS_PUTS64(buf + 0x40, attr->compsize);
966 }
967 NTFS_PUTU32(buf, attr->type);
968 NTFS_PUTU32(buf + 4, asize);
969 NTFS_PUTU8(buf + 8, attr->resident ? 0 : 1);
970 NTFS_PUTU8(buf + 9, attr->namelen);
971 NTFS_PUTU16(buf + 0xa, nameoff);
972 NTFS_PUTU16(buf + 0xc, attr->flags);
973 NTFS_PUTU16(buf + 0xe, attr->attrno);
974 if (attr->namelen)
975 ntfs_memcpy(buf + nameoff, attr->name, 2 * attr->namelen);
976 *psize = asize;
977 return 0;
978 }
979
980 /**
981 * layout_inode - convert an in-memory inode into on disk mft record(s)
982 * @ino: in memory inode to convert
983 * @store: on disk inode, contain buffers for the on disk mft record(s)
984 *
985 * layout_inode takes the in memory inode @ino, converts it into a (sequence of)
986 * mft record(s) and writes them to the appropriate buffers in the @store.
987 *
988 * Return 0 on success,
989 * the required mft record count (>0) if the inode does not fit,
990 * -ENOMEM if memory allocation problem, or
991 * -EOPNOTSUP if beyond our capabilities.
992 *
993 * TODO: We at the moment do not support extension mft records. (AIA)
994 */
995 int layout_inode(ntfs_inode *ino, ntfs_disk_inode *store)
996 {
997 int offset, i, size, psize, error, count, recno;
998 ntfs_attribute *attr;
999 unsigned char *rec;
1000
1001 error = allocate_store(ino->vol, store, ino->record_count);
1002 if (error)
1003 return error;
1004 size = ino->vol->mft_record_size;
1005 count = i = 0;
1006 do {
1007 if (count < ino->record_count) {
1008 recno = ino->records[count];
1009 } else {
1010 error = allocate_store(ino->vol, store, count + 1);
1011 if (error)
1012 return error;
1013 recno = -1;
1014 }
1015 /*
1016 * FIXME: We need to support extension records properly.
1017 * At the moment they wouldn't work. Probably would "just" get
1018 * corrupted if we write to them... (AIA)
1019 */
1020 store->records[count].recno = recno;
1021 rec = store->records[count].record;
1022 count++;
1023 /* Copy mft record header. */
1024 offset = NTFS_GETU16(ino->attr + 0x14); /* attrs_offset */
1025 ntfs_memcpy(rec, ino->attr, offset);
1026 /* Copy attributes. */
1027 while (i < ino->attr_count) {
1028 attr = ino->attrs + i;
1029 error = layout_attr(attr, rec + offset,
1030 size - offset - 8, &psize);
1031 if (error == -E2BIG && offset != NTFS_GETU16(ino->attr
1032 + 0x14))
1033 break;
1034 if (error)
1035 return error;
1036 offset += psize;
1037 i++;
1038 }
1039 /* Terminating attribute. */
1040 NTFS_PUTU32(rec + offset, 0xFFFFFFFF);
1041 offset += 4;
1042 NTFS_PUTU32(rec + offset, 0);
1043 offset += 4;
1044 NTFS_PUTU32(rec + 0x18, offset);
1045 } while (i < ino->attr_count || count < ino->record_count);
1046 return count - ino->record_count;
1047 }
1048
1049 /*
1050 * FIXME: ntfs_update_inode() calls layout_inode() to create the mft record on
1051 * disk structure corresponding to the inode @ino. After that, ntfs_write_attr()
1052 * is called to write out the created mft record to disk.
1053 * We shouldn't need to re-layout every single time we are updating an mft
1054 * record. No wonder the ntfs driver is slow like hell. (AIA)
1055 */
1056 int ntfs_update_inode(ntfs_inode *ino)
1057 {
1058 int error, i;
1059 ntfs_disk_inode store;
1060 ntfs_io io;
1061
1062 ntfs_bzero(&store, sizeof(store));
1063 error = layout_inode(ino, &store);
1064 if (error == -E2BIG) {
1065 i = ntfs_split_indexroot(ino);
1066 if (i != -ENOTDIR) {
1067 if (!i)
1068 i = layout_inode(ino, &store);
1069 error = i;
1070 }
1071 }
1072 if (error == -E2BIG) {
1073 error = ntfs_attr_allnonresident(ino);
1074 if (!error)
1075 error = layout_inode(ino, &store);
1076 }
1077 if (error > 0) {
1078 /* FIXME: Introduce extension records. */
1079 error = -E2BIG;
1080 }
1081 if (error) {
1082 if (error == -E2BIG)
1083 ntfs_error("Cannot handle saving inode 0x%x.\n",
1084 ino->i_number);
1085 deallocate_store(&store);
1086 return error;
1087 }
1088 io.fn_get = ntfs_get;
1089 io.fn_put = 0;
1090 for (i = 0; i < store.count; i++) {
1091 error = ntfs_insert_fixups(store.records[i].record,
1092 ino->vol->mft_record_size);
1093 if (error) {
1094 printk(KERN_ALERT "NTFS: ntfs_update_inode() caught "
1095 "corrupt %s mtf record ntfs record "
1096 "header. Refusing to write corrupt "
1097 "data to disk. Unmount and run chkdsk "
1098 "immediately!\n", i ? "extension":
1099 "base");
1100 deallocate_store(&store);
1101 return -EIO;
1102 }
1103 io.param = store.records[i].record;
1104 io.size = ino->vol->mft_record_size;
1105 error = ntfs_write_attr(ino->vol->mft_ino, ino->vol->at_data,
1106 0, (__s64)store.records[i].recno <<
1107 ino->vol->mft_record_size_bits, &io);
1108 if (error || io.size != ino->vol->mft_record_size) {
1109 /* Big trouble, partially written file. */
1110 ntfs_error("Please unmount: Write error in inode "
1111 "0x%x\n", ino->i_number);
1112 deallocate_store(&store);
1113 return error ? error : -EIO;
1114 }
1115 }
1116 deallocate_store(&store);
1117 return 0;
1118 }
1119
1120 void ntfs_decompress(unsigned char *dest, unsigned char *src, ntfs_size_t l)
1121 {
1122 int head, comp;
1123 int copied = 0;
1124 unsigned char *stop;
1125 int bits;
1126 int tag = 0;
1127 int clear_pos;
1128
1129 while (1) {
1130 head = NTFS_GETU16(src) & 0xFFF;
1131 /* High bit indicates that compression was performed. */
1132 comp = NTFS_GETU16(src) & 0x8000;
1133 src += 2;
1134 stop = src + head;
1135 bits = 0;
1136 clear_pos = 0;
1137 if (head == 0)
1138 /* Block is not used. */
1139 return;/* FIXME: copied */
1140 if (!comp) { /* uncompressible */
1141 ntfs_memcpy(dest, src, 0x1000);
1142 dest += 0x1000;
1143 copied += 0x1000;
1144 src += 0x1000;
1145 if (l == copied)
1146 return;
1147 continue;
1148 }
1149 while (src <= stop) {
1150 if (clear_pos > 4096) {
1151 ntfs_error("Error 1 in decompress\n");
1152 return;
1153 }
1154 if (!bits) {
1155 tag = NTFS_GETU8(src);
1156 bits = 8;
1157 src++;
1158 if (src > stop)
1159 break;
1160 }
1161 if (tag & 1) {
1162 int i, len, delta, code, lmask, dshift;
1163 code = NTFS_GETU16(src);
1164 src += 2;
1165 if (!clear_pos) {
1166 ntfs_error("Error 2 in decompress\n");
1167 return;
1168 }
1169 for (i = clear_pos - 1, lmask = 0xFFF,
1170 dshift = 12; i >= 0x10; i >>= 1) {
1171 lmask >>= 1;
1172 dshift--;
1173 }
1174 delta = code >> dshift;
1175 len = (code & lmask) + 3;
1176 for (i = 0; i < len; i++) {
1177 dest[clear_pos] = dest[clear_pos -
1178 delta - 1];
1179 clear_pos++;
1180 copied++;
1181 if (copied==l)
1182 return;
1183 }
1184 } else {
1185 dest[clear_pos++] = NTFS_GETU8(src);
1186 src++;
1187 copied++;
1188 if (copied==l)
1189 return;
1190 }
1191 tag >>= 1;
1192 bits--;
1193 }
1194 dest += clear_pos;
1195 }
1196 }
1197
1198 /*
1199 * NOTE: Neither of the ntfs_*_bit functions are atomic! But we don't need
1200 * them atomic at present as we never operate on shared/cached bitmaps.
1201 */
1202 static __inline__ int ntfs_test_bit(unsigned char *byte, const int bit)
1203 {
1204 return byte[bit >> 3] & (1 << (bit & 7)) ? 1 : 0;
1205 }
1206
1207 static __inline__ void ntfs_set_bit(unsigned char *byte, const int bit)
1208 {
1209 byte[bit >> 3] |= 1 << (bit & 7);
1210 }
1211
1212 static __inline__ void ntfs_clear_bit(unsigned char *byte, const int bit)
1213 {
1214 byte[bit >> 3] &= ~(1 << (bit & 7));
1215 }
1216
1217 static __inline__ int ntfs_test_and_clear_bit(unsigned char *byte,
1218 const int bit)
1219 {
1220 unsigned char *ptr = byte + (bit >> 3);
1221 int b = 1 << (bit & 7);
1222 int oldbit = *ptr & b ? 1 : 0;
1223 *ptr &= ~b;
1224 return oldbit;
1225 }
1226
1227 static void dump_runlist(const ntfs_runlist *rl, const int rlen)
1228 {
1229 #ifdef DEBUG
1230 int i;
1231 ntfs_cluster_t ct;
1232
1233 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): rlen = %i.\n", rlen);
1234 ntfs_debug(DEBUG_OTHER, "VCN LCN Run length\n");
1235 for (i = 0, ct = 0; i < rlen; ct += rl[i++].len) {
1236 if (rl[i].lcn == (ntfs_cluster_t)-1)
1237 ntfs_debug(DEBUG_OTHER, "0x%-8x LCN_HOLE 0x%-8x "
1238 "(%s)\n", ct, rl[i].len, rl[i].len ?
1239 "sparse run" : "run list end");
1240 else
1241 ntfs_debug(DEBUG_OTHER, "0x%-8x 0x%-8x 0x%-8x%s\n", ct,
1242 rl[i].lcn, rl[i].len, rl[i].len &&
1243 i + 1 < rlen ? "" : " (run list end)");
1244 if (!rl[i].len)
1245 break;
1246 }
1247 #endif
1248 }
1249
1250 /**
1251 * splice_runlists - splice two run lists into one
1252 * @rl1: pointer to address of first run list
1253 * @r1len: number of elementfs in first run list
1254 * @rl2: pointer to second run list
1255 * @r2len: number of elements in second run list
1256 *
1257 * Append the run list @rl2 to the run list *@rl1 and return the result in
1258 * *@rl1 and *@r1len.
1259 *
1260 * Return 0 on success or -errno on error, in which case *@rl1 and *@r1len are
1261 * left untouched.
1262 *
1263 * The only possible error code at the moment is -ENOMEM and only happens if
1264 * there is insufficient memory to allocate the new run list (only happens
1265 * when size of (rl1 + rl2) > allocated size of rl1).
1266 */
1267 int splice_runlists(ntfs_runlist **rl1, int *r1len, const ntfs_runlist *rl2,
1268 int r2len)
1269 {
1270 ntfs_runlist *rl;
1271 int rlen, rl_size, rl2_pos;
1272
1273 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Entering with *r1len = %i, "
1274 "r2len = %i.\n", *r1len, r2len);
1275 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Dumping 1st runlist.\n");
1276 if (*rl1)
1277 dump_runlist(*rl1, *r1len);
1278 else
1279 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Not present.\n");
1280 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Dumping 2nd runlist.\n");
1281 dump_runlist(rl2, r2len);
1282 rlen = *r1len + r2len + 1;
1283 rl_size = (rlen * sizeof(ntfs_runlist) + PAGE_SIZE - 1) &
1284 PAGE_MASK;
1285 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): rlen = %i, rl_size = %i.\n",
1286 rlen, rl_size);
1287 /* Do we have enough space? */
1288 if (rl_size <= ((*r1len * sizeof(ntfs_runlist) + PAGE_SIZE - 1) &
1289 PAGE_MASK)) {
1290 /* Have enough space already. */
1291 rl = *rl1;
1292 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Have enough space "
1293 "already.\n");
1294 } else {
1295 /* Need more space. Reallocate. */
1296 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Need more space.\n");
1297 rl = ntfs_vmalloc(rlen << sizeof(ntfs_runlist));
1298 if (!rl)
1299 return -ENOMEM;
1300 /* Copy over rl1. */
1301 ntfs_memcpy(rl, *rl1, *r1len * sizeof(ntfs_runlist));
1302 ntfs_vfree(*rl1);
1303 *rl1 = rl;
1304 }
1305 /* Reuse rl_size as the current position index into rl. */
1306 rl_size = *r1len - 1;
1307 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): rl_size = %i.\n");
1308 /* Coalesce neighbouring elements, if present. */
1309 rl2_pos = 0;
1310 if (rl[rl_size].lcn + rl[rl_size].len == rl2[rl2_pos].lcn) {
1311 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Coalescing adjacent "
1312 "runs.\n");
1313 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before: "
1314 "rl[rl_size].len = %i.\n", rl[rl_size].len);
1315 rl[rl_size].len += rl2[rl2_pos].len;
1316 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After: "
1317 "rl[rl_size].len = %i.\n", rl[rl_size].len);
1318 rl2_pos++;
1319 r2len--;
1320 rlen--;
1321 }
1322 rl_size++;
1323 /* Copy over rl2. */
1324 ntfs_memcpy(rl + rl_size, rl2 + rl2_pos, r2len * sizeof(ntfs_runlist));
1325 rlen--;
1326 rl[rlen].lcn = (ntfs_cluster_t)-1;
1327 rl[rlen].len = (ntfs_cluster_t)0;
1328 *r1len = rlen;
1329 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Dumping result runlist.\n");
1330 dump_runlist(*rl1, *r1len);
1331 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Returning with *r1len = "
1332 "%i.\n", rlen);
1333 return 0;
1334 }
1335
1336 /**
1337 * ntfs_alloc_mft_record - allocate an mft record
1338 * @vol: volume to allocate an mft record on
1339 * @result: the mft record number allocated
1340 *
1341 * Allocate a new mft record on disk. Return 0 on success or -ERRNO on error.
1342 * On success, *@result contains the allocated mft record number. On error,
1343 * *@result is -1UL.
1344 *
1345 * Note, this function doesn't actually set the mft record to be in use. This
1346 * is done by the caller, which at the moment is only ntfs_alloc_inode().
1347 *
1348 * To find a free mft record, we scan the mft bitmap for a zero bit. To
1349 * optimize this we start scanning at the place where we last stopped and we
1350 * perform wrap around when we reach the end. Note, we do not try to allocate
1351 * mft records below number 24 because numbers 0 to 15 are the defined system
1352 * files anyway and 16 to 24 are special in that they are used for storing
1353 * extension mft records for $MFT's $DATA attribute. This is required to avoid
1354 * the possibility of creating a run list with a circular dependence which once
1355 * written to disk can never be read in again. Windows will only use records
1356 * 16 to 24 for normal files if the volume is completely out of space. We never
1357 * use them which means that when the volume is really out of space we cannot
1358 * create any more files while Windows can still create up to 8 small files. We
1359 * can start doing this at some later time, doesn't matter much for now.
1360 *
1361 * When scanning the mft bitmap, we only search up to the last allocated mft
1362 * record. If there are no free records left in the range 24 to number of
1363 * allocated mft records, then we extend the mft data in order to create free
1364 * mft records. We extend the allocated size of $MFT/$DATA by 16 records at a
1365 * time or one cluster, if cluster size is above 16kiB. If there isn't
1366 * sufficient space to do this, we try to extend by a single mft record or one
1367 * cluster, if cluster size is above mft record size, but we only do this if
1368 * there is enough free space, which we know from the values returned by the
1369 * failed cluster allocation function when we tried to do the first allocation.
1370 *
1371 * No matter how many mft records we allocate, we initialize only the first
1372 * allocated mft record (incrementing mft data size and initialized size) and
1373 * return its number to the caller in @*result, unless there are less than 24
1374 * mft records, in which case we allocate and initialize mft records until we
1375 * reach record 24 which we consider as the first free mft record for use by
1376 * normal files.
1377 *
1378 * If during any stage we overflow the initialized data in the mft bitmap, we
1379 * extend the initialized size (and data size) by 8 bytes, allocating another
1380 * cluster if required. The bitmap data size has to be at least equal to the
1381 * number of mft records in the mft, but it can be bigger, in which case the
1382 * superflous bits are padded with zeroes.
1383 *
1384 * Thus, when we return successfully (return value 0), we will have:
1385 * - initialized / extended the mft bitmap if necessary,
1386 * - initialized / extended the mft data if necessary,
1387 * - set the bit corresponding to the mft record being allocated in the
1388 * mft bitmap, and we will
1389 * - return the mft record number in @*result.
1390 *
1391 * On error (return value below zero), nothing will have changed. If we had
1392 * changed anything before the error occured, we will have reverted back to
1393 * the starting state before returning to the caller. Thus, except for bugs,
1394 * we should always leave the volume in a consitents state when returning from
1395 * this function. NOTE: Small exception to this is that we set the bit in the
1396 * mft bitmap but we do not mark the mft record in use, which is inconsistent.
1397 * However, the caller will immediately add the wanted attributes to the mft
1398 * record, set it in use and write it out to disk, so there should be no
1399 * problem.
1400 *
1401 * Note, this function cannot make use of most of the normal functions, like
1402 * for example for attribute resizing, etc, because when the run list overflows
1403 * the base mft record and an attribute list is used, it is very important
1404 * that the extension mft records used to store the $DATA attribute of $MFT
1405 * can be reached without having to read the information contained inside
1406 * them, as this would make it impossible to find them in the first place
1407 * after the volume is dismounted. $MFT/$BITMAP probably doesn't need to
1408 * follow this rule because the bitmap is not essential for finding the mft
1409 * records, but on the other hand, handling the bitmap in this special way
1410 * would make life easier because otherwise there might be circular invocations
1411 * of functions when reading the bitmap but if we are careful, we should be
1412 * able to avoid all problems.
1413 *
1414 * FIXME: Don't forget $MftMirr, though this probably belongs in
1415 * ntfs_update_inode() (or even deeper). (AIA)
1416 *
1417 * FIXME: Want finer grained locking. (AIA)
1418 */
1419 static int ntfs_alloc_mft_record(ntfs_volume *vol, unsigned long *result)
1420 {
1421 unsigned long nr_mft_records, buf_size, buf_pos, pass_start, pass_end;
1422 unsigned long last_read_pos, mft_rec_size, bit, l;
1423 ntfs_attribute *data, *bmp;
1424 __u8 *buf, *byte, pass, b, have_allocated_mftbmp = 0;
1425 int rlen, rl_size = 0, r2len, rl2_size, old_data_rlen, err = 0;
1426 ntfs_runlist *rl, *rl2;
1427 ntfs_cluster_t lcn = 0, old_data_len;
1428 ntfs_io io;
1429 __s64 ll, old_data_allocated, old_data_initialized, old_data_size;
1430
1431 *result = -1UL;
1432 /* Allocate a buffer and setup the io structure. */
1433 buf = (__u8*)__get_free_page(GFP_NOFS);
1434 if (!buf)
1435 return -ENOMEM;
1436 lock_kernel();
1437 /* Get the $DATA and $BITMAP attributes of $MFT. */
1438 data = ntfs_find_attr(vol->mft_ino, vol->at_data, 0);
1439 bmp = ntfs_find_attr(vol->mft_ino, vol->at_bitmap, 0);
1440 if (!data || !bmp) {
1441 err = -EINVAL;
1442 goto err_ret;
1443 }
1444 /* Determine the number of allocated mft records in the mft. */
1445 pass_end = nr_mft_records = data->allocated >>
1446 vol->mft_record_size_bits;
1447 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): nr_mft_records = %lu.\n",
1448 nr_mft_records);
1449 /* Make sure we don't overflow the bitmap. */
1450 l = bmp->initialized << 3;
1451 if (l < nr_mft_records)
1452 // FIXME: It might be a good idea to extend the bitmap instead.
1453 pass_end = l;
1454 pass = 1;
1455 buf_pos = vol->mft_data_pos;
1456 if (buf_pos >= pass_end) {
1457 buf_pos = 24UL;
1458 pass = 2;
1459 }
1460 pass_start = buf_pos;
1461 rl = bmp->d.r.runlist;
1462 rlen = bmp->d.r.len - 1;
1463 lcn = rl[rlen].lcn + rl[rlen].len;
1464 io.fn_put = ntfs_put;
1465 io.fn_get = ntfs_get;
1466 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Starting bitmap search.\n");
1467 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): pass = %i, pass_start = %lu, "
1468 "pass_end = %lu.\n", pass, pass_start, pass_end);
1469 byte = NULL; // FIXME: For debugging only.
1470 /* Loop until a free mft record is found. */
1471 io.size = (nr_mft_records >> 3) & ~PAGE_MASK;
1472 for (;; io.size = PAGE_SIZE) {
1473 io.param = buf;
1474 io.do_read = 1;
1475 last_read_pos = buf_pos >> 3;
1476 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before: "
1477 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1478 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1479 bmp->size, bmp->initialized);
1480 err = ntfs_readwrite_attr(vol->mft_ino, bmp, last_read_pos,
1481 &io);
1482 if (err)
1483 goto err_ret;
1484 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Read %lu bytes.\n",
1485 (unsigned long)io.size);
1486 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After: "
1487 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1488 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1489 bmp->size, bmp->initialized);
1490 if (!io.size)
1491 goto pass_done;
1492 buf_size = io.size << 3;
1493 bit = buf_pos & 7UL;
1494 buf_pos &= ~7UL;
1495 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before loop: "
1496 "buf_size = %lu, buf_pos = %lu, bit = %lu, "
1497 "*byte = 0x%x, b = %u.\n",
1498 buf_size, buf_pos, bit, byte ? *byte : -1, b);
1499 for (; bit < buf_size && bit + buf_pos < pass_end;
1500 bit &= ~7UL, bit += 8UL) {
1501 byte = buf + (bit >> 3);
1502 if (*byte == 0xff)
1503 continue;
1504 b = ffz((unsigned long)*byte);
1505 if (b < (__u8)8 && b >= (bit & 7UL)) {
1506 bit = b + (bit & ~7UL) + buf_pos;
1507 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
1508 "Found free rec in for loop. "
1509 "bit = %lu\n", bit);
1510 goto found_free_rec;
1511 }
1512 }
1513 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After loop: "
1514 "buf_size = %lu, buf_pos = %lu, bit = %lu, "
1515 "*byte = 0x%x, b = %u.\n",
1516 buf_size, buf_pos, bit, byte ? *byte : -1, b);
1517 buf_pos += buf_size;
1518 if (buf_pos < pass_end)
1519 continue;
1520 pass_done: /* Finished with the current pass. */
1521 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At pass_done.\n");
1522 if (pass == 1) {
1523 /*
1524 * Now do pass 2, scanning the first part of the zone
1525 * we omitted in pass 1.
1526 */
1527 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Done pass "
1528 "1.\n");
1529 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Pass = 2.\n");
1530 pass = 2;
1531 pass_end = pass_start;
1532 buf_pos = pass_start = 24UL;
1533 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): pass = %i, "
1534 "pass_start = %lu, pass_end = %lu.\n",
1535 pass, pass_start, pass_end);
1536 continue;
1537 } /* pass == 2 */
1538 /* No free records left. */
1539 if (bmp->initialized << 3 > nr_mft_records &&
1540 bmp->initialized > 3) {
1541 /*
1542 * The mft bitmap is already bigger but the space is
1543 * not covered by mft records, this implies that the
1544 * next records are all free, so we already have found
1545 * a free record.
1546 */
1547 bit = nr_mft_records;
1548 if (bit < 24UL)
1549 bit = 24UL;
1550 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Found free "
1551 "record bit (#1) = 0x%lx.\n", bit);
1552 goto found_free_rec;
1553 }
1554 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Done pass 2.\n");
1555 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before: "
1556 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1557 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1558 bmp->size, bmp->initialized);
1559 /* Need to extend the mft bitmap. */
1560 if (bmp->initialized + 8LL > bmp->allocated) {
1561 ntfs_io io2;
1562
1563 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Initialized "
1564 "> allocated.\n");
1565 /* Need to extend bitmap by one more cluster. */
1566 rl = bmp->d.r.runlist;
1567 rlen = bmp->d.r.len - 1;
1568 lcn = rl[rlen].lcn + rl[rlen].len;
1569 io2.fn_put = ntfs_put;
1570 io2.fn_get = ntfs_get;
1571 io2.param = &b;
1572 io2.size = 1;
1573 io2.do_read = 1;
1574 err = ntfs_readwrite_attr(vol->bitmap, data, lcn >> 3,
1575 &io2);
1576 if (err)
1577 goto err_ret;
1578 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Read %lu "
1579 "bytes.\n", (unsigned long)io2.size);
1580 if (io2.size == 1 && b != 0xff) {
1581 __u8 tb = 1 << (lcn & (ntfs_cluster_t)7);
1582 if (!(b & tb)) {
1583 /* Next cluster is free. Allocate it. */
1584 b |= tb;
1585 io2.param = &b;
1586 io2.do_read = 0;
1587 err = ntfs_readwrite_attr(vol->bitmap,
1588 data, lcn >> 3, &io2);
1589 if (err || io.size != 1) {
1590 if (!err)
1591 err = -EIO;
1592 goto err_ret;
1593 }
1594 append_mftbmp_simple: rl[rlen].len++;
1595 have_allocated_mftbmp |= 1;
1596 ntfs_debug(DEBUG_OTHER, __FUNCTION__
1597 "(): Appending one "
1598 "cluster to mftbmp.\n");
1599 }
1600 }
1601 if (!have_allocated_mftbmp) {
1602 /* Allocate a cluster from the DATA_ZONE. */
1603 ntfs_cluster_t lcn2 = lcn;
1604 ntfs_cluster_t count = 1;
1605 err = ntfs_allocate_clusters(vol, &lcn2,
1606 &count, &rl2, &r2len,
1607 DATA_ZONE);
1608 if (err)
1609 goto err_ret;
1610 if (count != 1 || lcn2 <= 0) {
1611 if (count > 0) {
1612 rl2_dealloc_err_out: if (ntfs_deallocate_clusters(
1613 vol, rl2, r2len))
1614 ntfs_error(__FUNCTION__
1615 "(): Cluster "
1616 "deallocation in error "
1617 "code path failed! You "
1618 "should run chkdsk.\n");
1619 }
1620 ntfs_vfree(rl2);
1621 if (!err)
1622 err = -EINVAL;
1623 goto err_ret;
1624 }
1625 if (lcn2 == lcn) {
1626 ntfs_vfree(rl2);
1627 goto append_mftbmp_simple;
1628 }
1629 /* We need to append a new run. */
1630 rl_size = (rlen * sizeof(ntfs_runlist) +
1631 PAGE_SIZE - 1) & PAGE_MASK;
1632 /* Reallocate memory if necessary. */
1633 if ((rlen + 2) * sizeof(ntfs_runlist) >=
1634 rl_size) {
1635 ntfs_runlist *rlt;
1636
1637 rl_size += PAGE_SIZE;
1638 rlt = ntfs_vmalloc(rl_size);
1639 if (!rlt) {
1640 err = -ENOMEM;
1641 goto rl2_dealloc_err_out;
1642 }
1643 ntfs_memcpy(rlt, rl, rl_size -
1644 PAGE_SIZE);
1645 ntfs_vfree(rl);
1646 bmp->d.r.runlist = rl = rlt;
1647 }
1648 ntfs_vfree(rl2);
1649 rl[rlen].lcn = lcn = lcn2;
1650 rl[rlen].len = count;
1651 bmp->d.r.len = ++rlen;
1652 have_allocated_mftbmp |= 2;
1653 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
1654 "Adding run to mftbmp. "
1655 "LCN = %i, len = %i\n", lcn,
1656 count);
1657 }
1658 /*
1659 * We now have extended the mft bitmap allocated size
1660 * by one cluster. Reflect this in the attribute.
1661 */
1662 bmp->allocated += (__s64)vol->cluster_size;
1663 }
1664 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After: "
1665 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1666 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1667 bmp->size, bmp->initialized);
1668 /* We now have sufficient allocated space. */
1669 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Now have sufficient "
1670 "allocated space in mftbmp.\n");
1671 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before: "
1672 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1673 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1674 bmp->size, bmp->initialized);
1675 buf_pos = bmp->initialized;
1676 bmp->initialized += 8LL;
1677 if (bmp->initialized > bmp->size)
1678 bmp->size = bmp->initialized;
1679 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After: "
1680 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1681 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1682 bmp->size, bmp->initialized);
1683 have_allocated_mftbmp |= 4;
1684 /* Update the mft bitmap attribute value. */
1685 memset(buf, 0, 8);
1686 io.param = buf;
1687 io.size = 8;
1688 io.do_read = 0;
1689 err = ntfs_readwrite_attr(vol->mft_ino, bmp, buf_pos, &io);
1690 if (err || io.size != 8) {
1691 if (!err)
1692 err = -EIO;
1693 goto shrink_mftbmp_err_ret;
1694 }
1695 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Wrote extended "
1696 "mftbmp bytes %lu.\n", (unsigned long)io.size);
1697 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After write: "
1698 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1699 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1700 bmp->size, bmp->initialized);
1701 bit = buf_pos << 3;
1702 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Found free record "
1703 "bit (#2) = 0x%lx.\n", bit);
1704 goto found_free_rec;
1705 }
1706 found_free_rec:
1707 /* bit is the found free mft record. Allocate it in the mft bitmap. */
1708 vol->mft_data_pos = bit;
1709 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At found_free_rec.\n");
1710 io.param = buf;
1711 io.size = 1;
1712 io.do_read = 1;
1713 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before update: "
1714 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1715 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1716 bmp->size, bmp->initialized);
1717 err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io);
1718 if (err || io.size != 1) {
1719 if (!err)
1720 err = -EIO;
1721 goto shrink_mftbmp_err_ret;
1722 }
1723 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Read %lu bytes.\n",
1724 (unsigned long)io.size);
1725 #ifdef DEBUG
1726 /* Check our bit is really zero! */
1727 if (*buf & (1 << (bit & 7)))
1728 BUG();
1729 #endif
1730 *buf |= 1 << (bit & 7);
1731 io.param = buf;
1732 io.do_read = 0;
1733 err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io);
1734 if (err || io.size != 1) {
1735 if (!err)
1736 err = -EIO;
1737 goto shrink_mftbmp_err_ret;
1738 }
1739 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Wrote %lu bytes.\n",
1740 (unsigned long)io.size);
1741 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After update: "
1742 "bmp->allocated = 0x%Lx, bmp->size = 0x%Lx, "
1743 "bmp->initialized = 0x%Lx.\n", bmp->allocated,
1744 bmp->size, bmp->initialized);
1745 /* The mft bitmap is now uptodate. Deal with mft data attribute now. */
1746 ll = (__s64)(bit + 1) << vol->mft_record_size_bits;
1747 if (ll <= data->initialized) {
1748 /* The allocated record is already initialized. We are done! */
1749 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Allocated mft record "
1750 "already initialized!\n");
1751 goto done_ret;
1752 }
1753 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Allocated mft record needs "
1754 "to be initialized.\n");
1755 /* The mft record is outside the initialized data. */
1756 mft_rec_size = (unsigned long)vol->mft_record_size;
1757 /* Preserve old values for undo purposes. */
1758 old_data_allocated = data->allocated;
1759 old_data_rlen = data->d.r.len - 1;
1760 old_data_len = data->d.r.runlist[old_data_rlen].len;
1761 /*
1762 * If necessary, extend the mft until it covers the allocated record.
1763 * The loop is only actually used when a freshly formatted volume is
1764 * first written to. But it optimizes away nicely in the common case.
1765 */
1766 while (ll > data->allocated) {
1767 ntfs_cluster_t lcn2, nr_lcn2, nr, min_nr;
1768
1769 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Extending mft "
1770 "data allocation, data->allocated = 0x%Lx, "
1771 "data->size = 0x%Lx, data->initialized = "
1772 "0x%Lx.\n", data->allocated, data->size,
1773 data->initialized);
1774 /* Minimum allocation is one mft record worth of clusters. */
1775 if (mft_rec_size <= vol->cluster_size)
1776 min_nr = (ntfs_cluster_t)1;
1777 else
1778 min_nr = mft_rec_size >> vol->cluster_size_bits;
1779 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): min_nr = %i.\n",
1780 min_nr);
1781 /* Allocate 16 mft records worth of clusters. */
1782 nr = mft_rec_size << 4 >> vol->cluster_size_bits;
1783 if (!nr)
1784 nr = (ntfs_cluster_t)1;
1785 /* Determine the preferred allocation location. */
1786 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): nr = %i.\n", nr);
1787 rl2 = data->d.r.runlist;
1788 r2len = data->d.r.len;
1789 lcn2 = rl2[r2len - 1].lcn + rl2[r2len - 1].len;
1790 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): rl2[r2len - 1].lcn "
1791 "= %i, .len = %i.\n", rl2[r2len - 1].lcn,
1792 rl2[r2len - 1].len);
1793 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): lcn2 = %i, r2len = "
1794 "%i.\n", lcn2, r2len);
1795 retry_mft_data_allocation:
1796 nr_lcn2 = nr;
1797 err = ntfs_allocate_clusters(vol, &lcn2, &nr_lcn2, &rl2,
1798 &r2len, MFT_ZONE);
1799 #ifdef DEBUG
1800 if (!err && nr_lcn2 < min_nr)
1801 /* Allocated less than minimum needed. Weird! */
1802 BUG();
1803 #endif
1804 if (err) {
1805 /*
1806 * If there isn't enough space to do the wanted
1807 * allocation, but there is enough space to do a
1808 * minimal allocation, then try that, unless the wanted
1809 * allocation was already the minimal allocation.
1810 */
1811 if (err == -ENOSPC && nr > min_nr &&
1812 nr_lcn2 >= min_nr) {
1813 nr = min_nr;
1814 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
1815 "Retrying mft data "
1816 "allocation, nr = min_nr = %i"
1817 ".\n", nr);
1818 goto retry_mft_data_allocation;
1819 }
1820 goto undo_mftbmp_alloc_err_ret;
1821 }
1822 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Allocated %i "
1823 "clusters starting at LCN %i.\n", nr_lcn2,
1824 lcn2);
1825 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Allocated "
1826 "runlist:\n");
1827 dump_runlist(rl2, r2len);
1828 /* Append rl2 to the mft data attribute's run list. */
1829 err = splice_runlists(&data->d.r.runlist, (int*)&data->d.r.len,
1830 rl2, r2len);
1831 if (err) {
1832 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
1833 "splice_runlists failed with error "
1834 "code %i.\n", -err);
1835 goto undo_partial_data_alloc_err_ret;
1836 }
1837 /* Reflect the allocated clusters in the mft allocated data. */
1838 data->allocated += nr_lcn2 << vol->cluster_size_bits;
1839 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After extending mft "
1840 "data allocation, data->allocated = 0x%Lx, "
1841 "data->size = 0x%Lx, data->initialized = "
1842 "0x%Lx.\n", data->allocated, data->size,
1843 data->initialized);
1844 }
1845 /* Prepare a formatted (empty) mft record. */
1846 memset(buf, 0, mft_rec_size);
1847 ntfs_fill_mft_header(buf, mft_rec_size, 0, 0, 0);
1848 err = ntfs_insert_fixups(buf, mft_rec_size);
1849 if (err)
1850 goto undo_data_alloc_err_ret;
1851 /*
1852 * Extend mft data initialized size to reach the allocated mft record
1853 * and write the formatted mft record buffer to each mft record being
1854 * initialized. Note, that ntfs_readwrite_attr extends both
1855 * data->initialized and data->size, so no need for us to touch them.
1856 */
1857 old_data_initialized = data->initialized;
1858 old_data_size = data->size;
1859 while (ll > data->initialized) {
1860 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Initializing mft "
1861 "record 0x%Lx.\n",
1862 data->initialized >> vol->mft_record_size_bits);
1863 io.param = buf;
1864 io.size = mft_rec_size;
1865 io.do_read = 0;
1866 err = ntfs_readwrite_attr(vol->mft_ino, data,
1867 data->initialized, &io);
1868 if (err || io.size != mft_rec_size) {
1869 if (!err)
1870 err = -EIO;
1871 goto undo_data_init_err_ret;
1872 }
1873 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Wrote %i bytes to "
1874 "mft data.\n", io.size);
1875 }
1876 /* Update the VFS inode size as well. */
1877 VFS_I(vol->mft_ino)->i_size = data->size;
1878 #ifdef DEBUG
1879 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After mft record "
1880 "initialization: data->allocated = 0x%Lx, data->size "
1881 "= 0x%Lx, data->initialized = 0x%Lx.\n",
1882 data->allocated, data->size, data->initialized);
1883 /* Sanity checks. */
1884 if (data->size > data->allocated || data->size < data->initialized ||
1885 data->initialized > data->allocated)
1886 BUG();
1887 #endif
1888 done_ret:
1889 /* Return the number of the allocated mft record. */
1890 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At done_ret. *result = bit = "
1891 "0x%lx.\n", bit);
1892 *result = bit;
1893 vol->mft_data_pos = bit + 1;
1894 err_ret:
1895 unlock_kernel();
1896 free_page((unsigned long)buf);
1897 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Syncing inode $MFT.\n");
1898 if (ntfs_update_inode(vol->mft_ino))
1899 ntfs_error(__FUNCTION__ "(): Failed to sync inode $MFT. "
1900 "Continuing anyway.\n");
1901 if (!err) {
1902 ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): Done. Allocated mft "
1903 "record number *result = 0x%lx.\n", *result);
1904 return 0;
1905 }
1906 if (err != -ENOSPC)
1907 ntfs_error(__FUNCTION__ "(): Failed to allocate an mft "
1908 "record. Returning error code %i.\n", -err);
1909 else
1910 ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): Failed to allocate "
1911 "an mft record due to lack of free space.\n");
1912 return err;
1913 undo_data_init_err_ret:
1914 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At "
1915 "undo_data_init_err_ret.\n");
1916 data->initialized = old_data_initialized;
1917 data->size = old_data_size;
1918 undo_data_alloc_err_ret:
1919 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At undo_data_alloc_err_ret."
1920 "\n");
1921 data->allocated = old_data_allocated;
1922 undo_partial_data_alloc_err_ret:
1923 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At "
1924 "undo_partial_data_alloc_err_ret.\n");
1925 /* Deallocate the clusters. */
1926 if (ntfs_deallocate_clusters(vol, rl2, r2len))
1927 ntfs_error(__FUNCTION__ "(): Error deallocating clusters in "
1928 "error code path. You should run chkdsk.\n");
1929 ntfs_vfree(rl2);
1930 /* Revert the run list back to what it was before. */
1931 r2len = data->d.r.len;
1932 rl2 = data->d.r.runlist;
1933 rl2[old_data_rlen++].len = old_data_len;
1934 rl2[old_data_rlen].lcn = (ntfs_cluster_t)-1;
1935 rl2[old_data_rlen].len = (ntfs_cluster_t)0;
1936 data->d.r.len = old_data_rlen;
1937 rl2_size = ((old_data_rlen + 1) * sizeof(ntfs_runlist) + PAGE_SIZE -
1938 1) & PAGE_MASK;
1939 /* Reallocate memory freeing any extra memory allocated. */
1940 if (rl2_size < ((r2len * sizeof(ntfs_runlist) + PAGE_SIZE - 1) &
1941 PAGE_MASK)) {
1942 rl2 = ntfs_vmalloc(rl2_size);
1943 if (rl2) {
1944 ntfs_memcpy(rl2, data->d.r.runlist, rl2_size);
1945 ntfs_vfree(data->d.r.runlist);
1946 data->d.r.runlist = rl2;
1947 } else
1948 ntfs_error(__FUNCTION__ "(): Error reallocating "
1949 "memory in error code path. This "
1950 "should be harmless.\n");
1951 }
1952 undo_mftbmp_alloc_err_ret:
1953 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At "
1954 "undo_mftbmp_alloc_err_ret.\n");
1955 /* Deallocate the allocated bit in the mft bitmap. */
1956 io.param = buf;
1957 io.size = 1;
1958 io.do_read = 1;
1959 err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io);
1960 if (!err && io.size == 1) {
1961 *buf &= ~(1 << (bit & 7));
1962 io.param = buf;
1963 io.do_read = 0;
1964 err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io);
1965 }
1966 if (err || io.size != 1) {
1967 if (!err)
1968 err = -EIO;
1969 ntfs_error(__FUNCTION__ "(): Error deallocating mft record in "
1970 "error code path. You should run chkdsk.\n");
1971 }
1972 shrink_mftbmp_err_ret:
1973 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At shrink_mftbmp_err_ret.\n");
1974 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): have_allocated_mftbmp = "
1975 "%i.\n", have_allocated_mftbmp);
1976 if (!have_allocated_mftbmp)
1977 goto err_ret;
1978 /* Shrink the mftbmp back to previous size. */
1979 if (bmp->size == bmp->initialized)
1980 bmp->size -= 8LL;
1981 bmp->initialized -= 8LL;
1982 have_allocated_mftbmp &= ~4;
1983 /* If no allocation occured then we are done. */
1984 ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): have_allocated_mftbmp = "
1985 "%i.\n", have_allocated_mftbmp);
1986 if (!have_allocated_mftbmp)
1987 goto err_ret;
1988 /* Deallocate the allocated cluster. */
1989 bmp->allocated -= (__s64)vol->cluster_size;
1990 if (ntfs_deallocate_cluster_run(vol, lcn, (ntfs_cluster_t)1))
1991 ntfs_error(__FUNCTION__ "(): Error deallocating cluster in "
1992 "error code path. You should run chkdsk.\n");
1993 switch (have_allocated_mftbmp & 3) {
1994 case 1:
1995 /* Delete the last lcn from the last run of mftbmp. */
1996 rl[rlen - 1].len--;
1997 break;
1998 case 2:
1999 /* Delete the last run of mftbmp. */
2000 bmp->d.r.len = --rlen;
2001 /* Reallocate memory if necessary. */
2002 if ((rlen + 1) * sizeof(ntfs_runlist) <= rl_size - PAGE_SIZE) {
2003 ntfs_runlist *rlt;
2004
2005 rl_size -= PAGE_SIZE;
2006 rlt = ntfs_vmalloc(rl_size);
2007 if (rlt) {
2008 ntfs_memcpy(rlt, rl, rl_size);
2009 ntfs_vfree(rl);
2010 bmp->d.r.runlist = rl = rlt;
2011 } else
2012 ntfs_error(__FUNCTION__ "(): Error "
2013 "reallocating memory in error "
2014 "code path. This should be "
2015 "harmless.\n");
2016 }
2017 bmp->d.r.runlist[bmp->d.r.len].lcn = (ntfs_cluster_t)-1;
2018 bmp->d.r.runlist[bmp->d.r.len].len = (ntfs_cluster_t)0;
2019 break;
2020 default:
2021 BUG();
2022 }
2023 goto err_ret;
2024 }
2025
2026 /* We need 0x48 bytes in total. */
2027 static int add_standard_information(ntfs_inode *ino)
2028 {
2029 ntfs_time64_t now;
2030 char data[0x30];
2031 char *position = data;
2032 ntfs_attribute *si;
2033
2034 now = ntfs_now();
2035 NTFS_PUTU64(position + 0x00, now); /* File creation */
2036 NTFS_PUTU64(position + 0x08, now); /* Last modification */
2037 NTFS_PUTU64(position + 0x10, now); /* Last mod for MFT */
2038 NTFS_PUTU64(position + 0x18, now); /* Last access */
2039 NTFS_PUTU64(position + 0x20, 0); /* MSDOS file perms */
2040 NTFS_PUTU64(position + 0x28, 0); /* unknown */
2041 return ntfs_create_attr(ino, ino->vol->at_standard_information, 0,
2042 data, sizeof(data), &si);
2043 }
2044
2045 static int add_filename(ntfs_inode *ino, ntfs_inode *dir,
2046 const unsigned char *filename, int length, ntfs_u32 flags)
2047 {
2048 unsigned char *position;
2049 unsigned int size;
2050 ntfs_time64_t now;
2051 int count, error;
2052 unsigned char* data;
2053 ntfs_attribute *fn;
2054
2055 /* Work out the size. */
2056 size = 0x42 + 2 * length;
2057 data = ntfs_malloc(size);
2058 if (!data)
2059 return -ENOMEM;
2060 /* Search for a position. */
2061 position = data;
2062 NTFS_PUTINUM(position, dir); /* Inode num of dir */
2063 now = ntfs_now();
2064 NTFS_PUTU64(position + 0x08, now); /* File creation */
2065 NTFS_PUTU64(position + 0x10, now); /* Last modification */
2066 NTFS_PUTU64(position + 0x18, now); /* Last mod for MFT */
2067 NTFS_PUTU64(position + 0x20, now); /* Last access */
2068 /* FIXME: Get the following two sizes by finding the data attribute
2069 * in ino->attr and copying the corresponding fields from there.
2070 * If no data present then set to zero. In current implementation
2071 * add_data is called after add_filename so zero is correct on
2072 * creation. Need to change when we have hard links / support different
2073 * filename namespaces. (AIA) */
2074 NTFS_PUTS64(position + 0x28, 0); /* Allocated size */
2075 NTFS_PUTS64(position + 0x30, 0); /* Data size */
2076 NTFS_PUTU32(position + 0x38, flags); /* File flags */
2077 NTFS_PUTU32(position + 0x3c, 0); /* We don't use these
2078 * features yet. */
2079 NTFS_PUTU8(position + 0x40, length); /* Filename length */
2080 NTFS_PUTU8(position + 0x41, 0); /* Only long name */
2081 /* FIXME: This is madness. We are defining the POSIX namespace
2082 * for the filename here which can mean that the file will be
2083 * invisible when in Windows NT/2k! )-: (AIA) */
2084 position += 0x42;
2085 for (count = 0; count < length; count++) {
2086 NTFS_PUTU16(position + 2 * count, filename[count]);
2087 }
2088 error = ntfs_create_attr(ino, ino->vol->at_file_name, 0, data, size,
2089 &fn);
2090 if (!error)
2091 error = ntfs_dir_add(dir, ino, fn);
2092 ntfs_free(data);
2093 return error;
2094 }
2095
2096 int add_security(ntfs_inode* ino, ntfs_inode* dir)
2097 {
2098 int error;
2099 char *buf;
2100 int size;
2101 ntfs_attribute* attr;
2102 ntfs_io io;
2103 ntfs_attribute *se;
2104
2105 attr = ntfs_find_attr(dir, ino->vol->at_security_descriptor, 0);
2106 if (!attr)
2107 return -EOPNOTSUPP; /* Need security in directory. */
2108 size = attr->size;
2109 if (size > 512)
2110 return -EOPNOTSUPP;
2111 buf = ntfs_malloc(size);
2112 if (!buf)
2113 return -ENOMEM;
2114 io.fn_get = ntfs_get;
2115 io.fn_put = ntfs_put;
2116 io.param = buf;
2117 io.size = size;
2118 error = ntfs_read_attr(dir, ino->vol->at_security_descriptor, 0, 0,&io);
2119 if (!error && io.size != size)
2120 ntfs_error("wrong size in add_security\n");
2121 if (error) {
2122 ntfs_free(buf);
2123 return error;
2124 }
2125 /* FIXME: Consider ACL inheritance. */
2126 error = ntfs_create_attr(ino, ino->vol->at_security_descriptor,
2127 0, buf, size, &se);
2128 ntfs_free(buf);
2129 return error;
2130 }
2131
2132 static int add_data(ntfs_inode* ino, unsigned char *data, int length)
2133 {
2134 ntfs_attribute *da;
2135
2136 return ntfs_create_attr(ino, ino->vol->at_data, 0, data, length, &da);
2137 }
2138
2139 /*
2140 * We _could_ use 'dir' to help optimise inode allocation.
2141 *
2142 * FIXME: Need to undo what we do in ntfs_alloc_mft_record if we get an error
2143 * further on in ntfs_alloc_inode. Either fold the two functions to allow
2144 * proper undo or just deallocate the record from the mft bitmap. (AIA)
2145 */
2146 int ntfs_alloc_inode(ntfs_inode *dir, ntfs_inode *result, const char *filename,
2147 int namelen, ntfs_u32 flags)
2148 {
2149 ntfs_volume *vol = dir->vol;
2150 int err;
2151 ntfs_u8 buffer[2];
2152 ntfs_io io;
2153
2154 err = ntfs_alloc_mft_record(vol, &(result->i_number));
2155 if (err) {
2156 if (err == -ENOSPC)
2157 ntfs_error(__FUNCTION__ "(): No free inodes.\n");
2158 return err;
2159 }
2160 /* Get the sequence number. */
2161 io.fn_put = ntfs_put;
2162 io.fn_get = ntfs_get;
2163 io.param = buffer;
2164 io.size = 2;
2165 err = ntfs_read_attr(vol->mft_ino, vol->at_data, 0,
2166 ((__s64)result->i_number << vol->mft_record_size_bits)
2167 + 0x10, &io);
2168 // FIXME: We are leaving the MFT in inconsistent state! (AIA)
2169 if (err)
2170 return err;
2171 /* Increment the sequence number skipping zero. */
2172 result->sequence_number = (NTFS_GETU16(buffer) + 1) & 0xffff;
2173 if (!result->sequence_number)
2174 result->sequence_number++;
2175 result->vol = vol;
2176 result->attr_count = 0;
2177 result->attrs = 0;
2178 result->record_count = 1;
2179 result->records = ntfs_calloc(8 * sizeof(int));
2180 if (!result->records)
2181 goto mem_err_out;
2182 result->records[0] = result->i_number;
2183 result->attr = ntfs_calloc(vol->mft_record_size);
2184 if (!result->attr) {
2185 ntfs_free(result->records);
2186 result->records = NULL;
2187 goto mem_err_out;
2188 }
2189 ntfs_fill_mft_header(result->attr, vol->mft_record_size,
2190 result->sequence_number, 1, 1);
2191 err = add_standard_information(result);
2192 if (!err)
2193 err = add_filename(result, dir, filename, namelen, flags);
2194 if (!err)
2195 err = add_security(result, dir);
2196 // FIXME: We are leaving the MFT in inconsistent state on error! (AIA)
2197 return err;
2198 mem_err_out:
2199 // FIXME: We are leaving the MFT in inconsistent state! (AIA)
2200 result->record_count = 0;
2201 result->attr = NULL;
2202 return -ENOMEM;
2203 }
2204
2205 int ntfs_alloc_file(ntfs_inode *dir, ntfs_inode *result, char *filename,
2206 int namelen)
2207 {
2208 int err;
2209
2210 err = ntfs_alloc_inode(dir, result, filename, namelen, 0);
2211 if (!err)
2212 err = add_data(result, 0, 0);
2213 return err;
2214 }
2215
2216