File: /usr/src/linux/include/linux/reiserfs_fs_sb.h
1 /* Copyright 1996-2000 Hans Reiser, see reiserfs/README for licensing
2 * and copyright details */
3
4 #ifndef _LINUX_REISER_FS_SB
5 #define _LINUX_REISER_FS_SB
6
7 #ifdef __KERNEL__
8 #include <linux/tqueue.h>
9 #endif
10
11 //
12 // super block's field values
13 //
14 /*#define REISERFS_VERSION 0 undistributed bitmap */
15 /*#define REISERFS_VERSION 1 distributed bitmap and resizer*/
16 #define REISERFS_VERSION_2 2 /* distributed bitmap, resizer, 64-bit, etc*/
17 #define UNSET_HASH 0 // read_super will guess about, what hash names
18 // in directories were sorted with
19 #define TEA_HASH 1
20 #define YURA_HASH 2
21 #define R5_HASH 3
22 #define DEFAULT_HASH R5_HASH
23
24 /* this is the on disk super block */
25
26 struct reiserfs_super_block
27 {
28 __u32 s_block_count;
29 __u32 s_free_blocks; /* free blocks count */
30 __u32 s_root_block; /* root block number */
31 __u32 s_journal_block; /* journal block number */
32 __u32 s_journal_dev; /* journal device number */
33
34 /* Since journal size is currently a #define in a header file, if
35 ** someone creates a disk with a 16MB journal and moves it to a
36 ** system with 32MB journal default, they will overflow their journal
37 ** when they mount the disk. s_orig_journal_size, plus some checks
38 ** while mounting (inside journal_init) prevent that from happening
39 */
40
41 /* great comment Chris. Thanks. -Hans */
42
43 __u32 s_orig_journal_size;
44 __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */
45 __u32 s_journal_block_count ; /* total size of the journal. can change over time */
46 __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */
47 __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */
48 __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */
49 __u16 s_blocksize; /* block size */
50 __u16 s_oid_maxsize; /* max size of object id array, see get_objectid() commentary */
51 __u16 s_oid_cursize; /* current size of object id array */
52 __u16 s_state; /* valid or error */
53 char s_magic[12]; /* reiserfs magic string indicates that file system is reiserfs */
54 __u32 s_hash_function_code; /* indicate, what hash function is being use to sort names in a directory*/
55 __u16 s_tree_height; /* height of disk tree */
56 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
57 __u16 s_version; /* I'd prefer it if this was a string,
58 something like "3.6.4", and maybe
59 16 bytes long mostly unused. We
60 don't need to save bytes in the
61 superblock. -Hans */
62 __u16 s_reserved;
63 __u32 s_inode_generation;
64 char s_unused[124] ; /* zero filled by mkreiserfs */
65 } __attribute__ ((__packed__));
66
67 #define SB_SIZE (sizeof(struct reiserfs_super_block))
68
69 /* this is the super from 3.5.X, where X >= 10 */
70 struct reiserfs_super_block_v1
71 {
72 __u32 s_block_count; /* blocks count */
73 __u32 s_free_blocks; /* free blocks count */
74 __u32 s_root_block; /* root block number */
75 __u32 s_journal_block; /* journal block number */
76 __u32 s_journal_dev; /* journal device number */
77 __u32 s_orig_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
78 __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */
79 __u32 s_journal_block_count ; /* total size of the journal. can change over time */
80 __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */
81 __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */
82 __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */
83 __u16 s_blocksize; /* block size */
84 __u16 s_oid_maxsize; /* max size of object id array, see get_objectid() commentary */
85 __u16 s_oid_cursize; /* current size of object id array */
86 __u16 s_state; /* valid or error */
87 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */
88 __u16 s_tree_height; /* height of disk tree */
89 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
90 __u32 s_reserved;
91 } __attribute__ ((__packed__));
92
93 #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
94
95 /* LOGGING -- */
96
97 /* These all interelate for performance.
98 **
99 ** If the journal block count is smaller than n transactions, you lose speed.
100 ** I don't know what n is yet, I'm guessing 8-16.
101 **
102 ** typical transaction size depends on the application, how often fsync is
103 ** called, and how many metadata blocks you dirty in a 30 second period.
104 ** The more small files (<16k) you use, the larger your transactions will
105 ** be.
106 **
107 ** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
108 ** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough
109 ** to prevent wrapping before dirty meta blocks get to disk.
110 **
111 ** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
112 ** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
113 **
114 ** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
115 **
116 */
117
118 /* don't mess with these for a while */
119 /* we have a node size define somewhere in reiserfs_fs.h. -Hans */
120 #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
121 #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
122 #define JOURNAL_TRANS_MAX 1024 /* biggest possible single transaction, don't change for now (8/3/99) */
123 #define JOURNAL_HASH_SIZE 8192
124 #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */
125 #define JOURNAL_LIST_COUNT 64
126
127 /* these are bh_state bit flag offset numbers, for use in the buffer head */
128
129 #define BH_JDirty 16 /* journal data needs to be written before buffer can be marked dirty */
130 #define BH_JDirty_wait 18 /* commit is done, buffer marked dirty */
131 #define BH_JNew 19 /* buffer allocated during this transaction, no need to write if freed during this trans too */
132
133 /* ugly. metadata blocks must be prepared before they can be logged.
134 ** prepared means unlocked and cleaned. If the block is prepared, but not
135 ** logged for some reason, any bits cleared while preparing it must be
136 ** set again.
137 */
138 #define BH_JPrepared 20 /* block has been prepared for the log */
139 #define BH_JRestore_dirty 22 /* restore the dirty bit later */
140
141 /* One of these for every block in every transaction
142 ** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a
143 ** hash of all the in memory transactions.
144 ** next and prev are used by the current transaction (journal_hash).
145 ** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash
146 ** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging
147 ** to a given transaction.
148 */
149 struct reiserfs_journal_cnode {
150 struct buffer_head *bh ; /* real buffer head */
151 kdev_t dev ; /* dev of real buffer head */
152 unsigned long blocknr ; /* block number of real buffer head, == 0 when buffer on disk */
153 int state ;
154 struct reiserfs_journal_list *jlist ; /* journal list this cnode lives in */
155 struct reiserfs_journal_cnode *next ; /* next in transaction list */
156 struct reiserfs_journal_cnode *prev ; /* prev in transaction list */
157 struct reiserfs_journal_cnode *hprev ; /* prev in hash list */
158 struct reiserfs_journal_cnode *hnext ; /* next in hash list */
159 };
160
161 struct reiserfs_bitmap_node {
162 int id ;
163 char *data ;
164 struct list_head list ;
165 } ;
166
167 struct reiserfs_list_bitmap {
168 struct reiserfs_journal_list *journal_list ;
169 struct reiserfs_bitmap_node **bitmaps ;
170 } ;
171
172 /*
173 ** transaction handle which is passed around for all journal calls
174 */
175 struct reiserfs_transaction_handle {
176 /* ifdef it. -Hans */
177 char *t_caller ; /* debugging use */
178 int t_blocks_logged ; /* number of blocks this writer has logged */
179 int t_blocks_allocated ; /* number of blocks this writer allocated */
180 unsigned long t_trans_id ; /* sanity check, equals the current trans id */
181 struct super_block *t_super ; /* super for this FS when journal_begin was
182 called. saves calls to reiserfs_get_super */
183
184 } ;
185
186 /*
187 ** one of these for each transaction. The most important part here is the j_realblock.
188 ** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
189 ** real buffer heads dirty once all the commits hit the disk,
190 ** and to make sure every real block in a transaction is on disk before allowing the log area
191 ** to be overwritten */
192 struct reiserfs_journal_list {
193 unsigned long j_start ;
194 unsigned long j_len ;
195 atomic_t j_nonzerolen ;
196 atomic_t j_commit_left ;
197 atomic_t j_flushing ;
198 atomic_t j_commit_flushing ;
199 atomic_t j_older_commits_done ; /* all commits older than this on disk*/
200 unsigned long j_trans_id ;
201 time_t j_timestamp ;
202 struct reiserfs_list_bitmap *j_list_bitmap ;
203 struct buffer_head *j_commit_bh ; /* commit buffer head */
204 struct reiserfs_journal_cnode *j_realblock ;
205 struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans. free each of these on flush */
206 wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */
207 wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */
208 } ;
209
210 struct reiserfs_page_list ; /* defined in reiserfs_fs.h */
211
212 struct reiserfs_journal {
213 struct buffer_head ** j_ap_blocks ; /* journal blocks on disk */
214 struct reiserfs_journal_cnode *j_last ; /* newest journal block */
215 struct reiserfs_journal_cnode *j_first ; /* oldest journal block. start here for traverse */
216
217 int j_state ;
218 unsigned long j_trans_id ;
219 unsigned long j_mount_id ;
220 unsigned long j_start ; /* start of current waiting commit (index into j_ap_blocks) */
221 unsigned long j_len ; /* lenght of current waiting commit */
222 unsigned long j_len_alloc ; /* number of buffers requested by journal_begin() */
223 atomic_t j_wcount ; /* count of writers for current commit */
224 unsigned long j_bcount ; /* batch count. allows turning X transactions into 1 */
225 unsigned long j_first_unflushed_offset ; /* first unflushed transactions offset */
226 unsigned long j_last_flush_trans_id ; /* last fully flushed journal timestamp */
227 struct buffer_head *j_header_bh ;
228
229 /* j_flush_pages must be flushed before the current transaction can
230 ** commit
231 */
232 struct reiserfs_page_list *j_flush_pages ;
233 time_t j_trans_start_time ; /* time this transaction started */
234 wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */
235 atomic_t j_wlock ; /* lock for j_wait */
236 wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */
237 atomic_t j_jlock ; /* lock for j_join_wait */
238 int j_journal_list_index ; /* journal list number of the current trans */
239 int j_list_bitmap_index ; /* number of next list bitmap to use */
240 int j_must_wait ; /* no more journal begins allowed. MUST sleep on j_join_wait */
241 int j_next_full_flush ; /* next journal_end will flush all journal list */
242 int j_next_async_flush ; /* next journal_end will flush all async commits */
243
244 int j_cnode_used ; /* number of cnodes on the used list */
245 int j_cnode_free ; /* number of cnodes on the free list */
246
247 struct reiserfs_journal_cnode *j_cnode_free_list ;
248 struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */
249
250 int j_free_bitmap_nodes ;
251 int j_used_bitmap_nodes ;
252 struct list_head j_bitmap_nodes ;
253 struct inode j_dummy_inode ;
254 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ; /* array of bitmaps to record the deleted blocks */
255 struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ; /* array of all the journal lists */
256 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */
257 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all
258 the transactions */
259 struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */
260 };
261
262 #define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */
263
264
265 typedef __u32 (*hashf_t) (const char *, int);
266
267 /* reiserfs union of in-core super block data */
268 struct reiserfs_sb_info
269 {
270 struct buffer_head * s_sbh; /* Buffer containing the super block */
271 /* both the comment and the choice of
272 name are unclear for s_rs -Hans */
273 struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */
274 struct buffer_head ** s_ap_bitmap; /* array of buffers, holding block bitmap */
275 struct reiserfs_journal *s_journal ; /* pointer to journal information */
276 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
277
278 /* Comment? -Hans */
279 void (*end_io_handler)(struct buffer_head *, int);
280 hashf_t s_hash_function; /* pointer to function which is used
281 to sort names in directory. Set on
282 mount */
283 unsigned long s_mount_opt; /* reiserfs's mount options are set
284 here (currently - NOTAIL, NOLOG,
285 REPLAYONLY) */
286
287 /* Comment? -Hans */
288 wait_queue_head_t s_wait;
289 /* To be obsoleted soon by per buffer seals.. -Hans */
290 atomic_t s_generation_counter; // increased by one every time the
291 // tree gets re-balanced
292
293 /* session statistics */
294 int s_kmallocs;
295 int s_disk_reads;
296 int s_disk_writes;
297 int s_fix_nodes;
298 int s_do_balance;
299 int s_unneeded_left_neighbor;
300 int s_good_search_by_key_reada;
301 int s_bmaps;
302 int s_bmaps_without_search;
303 int s_direct2indirect;
304 int s_indirect2direct;
305 };
306
307
308 #define NOTAIL 0 /* -o notail: no tails will be created in a session */
309 #define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */
310 #define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */
311 #define REISERFS_CONVERT 5 /* -o conv: causes conversion of old
312 format super block to the new
313 format. If not specified - old
314 partition will be dealt with in a
315 manner of 3.5.x */
316
317 /* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
318 ** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option
319 ** is not required. If the normal autodection code can't determine which
320 ** hash to use (because both hases had the same value for a file)
321 ** use this option to force a specific hash. It won't allow you to override
322 ** the existing hash on the FS, so if you have a tea hash disk, and mount
323 ** with -o hash=rupasov, the mount will fail.
324 */
325 #define FORCE_TEA_HASH 6 /* try to force tea hash on mount */
326 #define FORCE_RUPASOV_HASH 7 /* try to force rupasov hash on mount */
327 #define FORCE_R5_HASH 8 /* try to force rupasov hash on mount */
328 #define FORCE_HASH_DETECT 9 /* try to detect hash function on mount */
329
330
331 /* used for testing experimental features, makes benchmarking new
332 features with and without more convenient, should never be used by
333 users in any code shipped to users (ideally) */
334
335 #define REISERFS_NO_BORDER 11
336 #define REISERFS_NO_UNHASHED_RELOCATION 12
337 #define REISERFS_HASHED_RELOCATION 13
338 #define REISERFS_TEST4 14
339
340 #define REISERFS_TEST1 11
341 #define REISERFS_TEST2 12
342 #define REISERFS_TEST3 13
343 #define REISERFS_TEST4 14
344
345 #define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH))
346 #define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH))
347 #define reiserfs_tea_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_TEA_HASH))
348 #define reiserfs_hash_detect(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_HASH_DETECT))
349 #define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER))
350 #define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
351 #define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
352 #define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4))
353
354 #define dont_have_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << NOTAIL))
355 #define replay_only(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REPLAYONLY))
356 #define reiserfs_dont_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NOLOG))
357 #define old_format_only(s) ((SB_VERSION(s) != REISERFS_VERSION_2) && !((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_CONVERT)))
358
359
360 void reiserfs_file_buffer (struct buffer_head * bh, int list);
361 int reiserfs_is_super(struct super_block *s) ;
362 int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ;
363 int flush_old_commits(struct super_block *s, int) ;
364 int show_reiserfs_locks(void) ;
365 int reiserfs_resize(struct super_block *, unsigned long) ;
366
367 #define CARRY_ON 0
368 #define SCHEDULE_OCCURRED 1
369
370
371 #define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh)
372 #define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal)
373 #define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list)
374 #define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index)
375 #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
376 #define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap)
377
378
379 // on-disk super block fields converted to cpu form
380 #define SB_DISK_SUPER_BLOCK(s) ((s)->u.reiserfs_sb.s_rs)
381 #define SB_BLOCK_COUNT(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_block_count))
382 #define SB_FREE_BLOCKS(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_free_blocks))
383 #define SB_REISERFS_MAGIC(s) (SB_DISK_SUPER_BLOCK(s)->s_magic)
384 #define SB_ROOT_BLOCK(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_root_block))
385 #define SB_TREE_HEIGHT(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_tree_height))
386 #define SB_REISERFS_STATE(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_state))
387 #define SB_VERSION(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_version))
388 #define SB_BMAP_NR(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_bmap_nr))
389
390 #define PUT_SB_BLOCK_COUNT(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
391 #define PUT_SB_FREE_BLOCKS(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
392 #define PUT_SB_ROOT_BLOCK(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
393 #define PUT_SB_TREE_HEIGHT(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
394 #define PUT_SB_REISERFS_STATE(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_state = cpu_to_le16(val); } while (0)
395 #define PUT_SB_VERSION(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
396 #define PUT_SB_BMAP_NR(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
397
398 #endif /* _LINUX_REISER_FS_SB */
399
400
401
402