File: /usr/src/linux/include/linux/reiserfs_fs_sb.h

1     /* Copyright 1996-2000 Hans Reiser, see reiserfs/README for licensing
2      * and copyright details */
3     
4     #ifndef _LINUX_REISER_FS_SB
5     #define _LINUX_REISER_FS_SB
6     
7     #ifdef __KERNEL__
8     #include <linux/tqueue.h>
9     #endif
10     
11     //
12     // super block's field values
13     //
14     /*#define REISERFS_VERSION 0 undistributed bitmap */
15     /*#define REISERFS_VERSION 1 distributed bitmap and resizer*/
16     #define REISERFS_VERSION_2 2 /* distributed bitmap, resizer, 64-bit, etc*/
17     #define UNSET_HASH 0 // read_super will guess about, what hash names
18                          // in directories were sorted with
19     #define TEA_HASH  1
20     #define YURA_HASH 2
21     #define R5_HASH   3
22     #define DEFAULT_HASH R5_HASH
23     
24     /* this is the on disk super block */
25     
26     struct reiserfs_super_block
27     {
28       __u32 s_block_count;
29       __u32 s_free_blocks;                  /* free blocks count    */
30       __u32 s_root_block;           	/* root block number    */
31       __u32 s_journal_block;           	/* journal block number    */
32       __u32 s_journal_dev;           	/* journal device number  */
33     
34       /* Since journal size is currently a #define in a header file, if 
35       ** someone creates a disk with a 16MB journal and moves it to a 
36       ** system with 32MB journal default, they will overflow their journal 
37       ** when they mount the disk.  s_orig_journal_size, plus some checks
38       ** while mounting (inside journal_init) prevent that from happening
39       */
40     
41     				/* great comment Chris. Thanks.  -Hans */
42     
43       __u32 s_orig_journal_size; 		
44       __u32 s_journal_trans_max ;           /* max number of blocks in a transaction.  */
45       __u32 s_journal_block_count ;         /* total size of the journal. can change over time  */
46       __u32 s_journal_max_batch ;           /* max number of blocks to batch into a trans */
47       __u32 s_journal_max_commit_age ;      /* in seconds, how old can an async commit be */
48       __u32 s_journal_max_trans_age ;       /* in seconds, how old can a transaction be */
49       __u16 s_blocksize;                   	/* block size           */
50       __u16 s_oid_maxsize;			/* max size of object id array, see get_objectid() commentary  */
51       __u16 s_oid_cursize;			/* current size of object id array */
52       __u16 s_state;                       	/* valid or error       */
53       char s_magic[12];                     /* reiserfs magic string indicates that file system is reiserfs */
54       __u32 s_hash_function_code;		/* indicate, what hash function is being use to sort names in a directory*/
55       __u16 s_tree_height;                  /* height of disk tree */
56       __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
57       __u16 s_version;		/* I'd prefer it if this was a string,
58                                        something like "3.6.4", and maybe
59                                        16 bytes long mostly unused. We
60                                        don't need to save bytes in the
61                                        superblock. -Hans */
62       __u16 s_reserved;
63       __u32 s_inode_generation;
64       char s_unused[124] ;			/* zero filled by mkreiserfs */
65     } __attribute__ ((__packed__));
66     
67     #define SB_SIZE (sizeof(struct reiserfs_super_block))
68     
69     /* this is the super from 3.5.X, where X >= 10 */
70     struct reiserfs_super_block_v1
71     {
72       __u32 s_block_count;			/* blocks count         */
73       __u32 s_free_blocks;                  /* free blocks count    */
74       __u32 s_root_block;           	/* root block number    */
75       __u32 s_journal_block;           	/* journal block number    */
76       __u32 s_journal_dev;           	/* journal device number  */
77       __u32 s_orig_journal_size; 		/* size of the journal on FS creation.  used to make sure they don't overflow it */
78       __u32 s_journal_trans_max ;           /* max number of blocks in a transaction.  */
79       __u32 s_journal_block_count ;         /* total size of the journal. can change over time  */
80       __u32 s_journal_max_batch ;           /* max number of blocks to batch into a trans */
81       __u32 s_journal_max_commit_age ;      /* in seconds, how old can an async commit be */
82       __u32 s_journal_max_trans_age ;       /* in seconds, how old can a transaction be */
83       __u16 s_blocksize;                   	/* block size           */
84       __u16 s_oid_maxsize;			/* max size of object id array, see get_objectid() commentary  */
85       __u16 s_oid_cursize;			/* current size of object id array */
86       __u16 s_state;                       	/* valid or error       */
87       char s_magic[16];                     /* reiserfs magic string indicates that file system is reiserfs */
88       __u16 s_tree_height;                  /* height of disk tree */
89       __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
90       __u32 s_reserved;
91     } __attribute__ ((__packed__));
92     
93     #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
94     
95     /* LOGGING -- */
96     
97     /* These all interelate for performance.  
98     **
99     ** If the journal block count is smaller than n transactions, you lose speed. 
100     ** I don't know what n is yet, I'm guessing 8-16.
101     **
102     ** typical transaction size depends on the application, how often fsync is
103     ** called, and how many metadata blocks you dirty in a 30 second period.  
104     ** The more small files (<16k) you use, the larger your transactions will
105     ** be.
106     ** 
107     ** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
108     ** to wrap, which slows things down.  If you need high speed meta data updates, the journal should be big enough
109     ** to prevent wrapping before dirty meta blocks get to disk.
110     **
111     ** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
112     ** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
113     **
114     ** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
115     **
116     */
117     
118     /* don't mess with these for a while */
119     				/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
120     #define JOURNAL_BLOCK_SIZE  4096 /* BUG gotta get rid of this */
121     #define JOURNAL_MAX_CNODE   1500 /* max cnodes to allocate. */
122     #define JOURNAL_TRANS_MAX 1024   /* biggest possible single transaction, don't change for now (8/3/99) */
123     #define JOURNAL_HASH_SIZE 8192   
124     #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating.  Must be >= 2 */
125     #define JOURNAL_LIST_COUNT 64
126     
127     /* these are bh_state bit flag offset numbers, for use in the buffer head */
128     
129     #define BH_JDirty       16      /* journal data needs to be written before buffer can be marked dirty */
130     #define BH_JDirty_wait 18	/* commit is done, buffer marked dirty */
131     #define BH_JNew 19		/* buffer allocated during this transaction, no need to write if freed during this trans too */
132     
133     /* ugly.  metadata blocks must be prepared before they can be logged.  
134     ** prepared means unlocked and cleaned.  If the block is prepared, but not
135     ** logged for some reason, any bits cleared while preparing it must be 
136     ** set again.
137     */
138     #define BH_JPrepared 20		/* block has been prepared for the log */
139     #define BH_JRestore_dirty 22    /* restore the dirty bit later */
140     
141     /* One of these for every block in every transaction
142     ** Each one is in two hash tables.  First, a hash of the current transaction, and after journal_end, a
143     ** hash of all the in memory transactions.
144     ** next and prev are used by the current transaction (journal_hash).
145     ** hnext and hprev are used by journal_list_hash.  If a block is in more than one transaction, the journal_list_hash
146     ** links it in multiple times.  This allows flush_journal_list to remove just the cnode belonging
147     ** to a given transaction.
148     */
149     struct reiserfs_journal_cnode {
150       struct buffer_head *bh ;		 /* real buffer head */
151       kdev_t dev ;				 /* dev of real buffer head */
152       unsigned long blocknr ;		 /* block number of real buffer head, == 0 when buffer on disk */		 
153       int state ;
154       struct reiserfs_journal_list *jlist ;  /* journal list this cnode lives in */
155       struct reiserfs_journal_cnode *next ;  /* next in transaction list */
156       struct reiserfs_journal_cnode *prev ;  /* prev in transaction list */
157       struct reiserfs_journal_cnode *hprev ; /* prev in hash list */
158       struct reiserfs_journal_cnode *hnext ; /* next in hash list */
159     };
160     
161     struct reiserfs_bitmap_node {
162       int id ;
163       char *data ;
164       struct list_head list ;
165     } ;
166     
167     struct reiserfs_list_bitmap {
168       struct reiserfs_journal_list *journal_list ;
169       struct reiserfs_bitmap_node **bitmaps ;
170     } ;
171     
172     /*
173     ** transaction handle which is passed around for all journal calls
174     */
175     struct reiserfs_transaction_handle {
176     				/* ifdef it. -Hans */
177       char *t_caller ;              /* debugging use */
178       int t_blocks_logged ;         /* number of blocks this writer has logged */
179       int t_blocks_allocated ;      /* number of blocks this writer allocated */
180       unsigned long t_trans_id ;    /* sanity check, equals the current trans id */
181       struct super_block *t_super ; /* super for this FS when journal_begin was 
182                                        called. saves calls to reiserfs_get_super */
183     
184     } ;
185     
186     /*
187     ** one of these for each transaction.  The most important part here is the j_realblock.
188     ** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
189     ** real buffer heads dirty once all the commits hit the disk,
190     ** and to make sure every real block in a transaction is on disk before allowing the log area
191     ** to be overwritten */
192     struct reiserfs_journal_list {
193       unsigned long j_start ;
194       unsigned long j_len ;
195       atomic_t j_nonzerolen ;
196       atomic_t j_commit_left ;
197       atomic_t j_flushing ;
198       atomic_t j_commit_flushing ;
199       atomic_t j_older_commits_done ;      /* all commits older than this on disk*/
200       unsigned long j_trans_id ;
201       time_t j_timestamp ;
202       struct reiserfs_list_bitmap *j_list_bitmap ;
203       struct buffer_head *j_commit_bh ; /* commit buffer head */
204       struct reiserfs_journal_cnode *j_realblock  ;
205       struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans.  free each of these on flush */
206       wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */
207       wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */
208     } ;
209     
210     struct reiserfs_page_list  ; /* defined in reiserfs_fs.h */
211     
212     struct reiserfs_journal {
213       struct buffer_head ** j_ap_blocks ; /* journal blocks on disk */
214       struct reiserfs_journal_cnode *j_last ; /* newest journal block */
215       struct reiserfs_journal_cnode *j_first ; /*  oldest journal block.  start here for traverse */
216     				
217       int j_state ;			
218       unsigned long j_trans_id ;
219       unsigned long j_mount_id ;
220       unsigned long j_start ;             /* start of current waiting commit (index into j_ap_blocks) */
221       unsigned long j_len ;               /* lenght of current waiting commit */
222       unsigned long j_len_alloc ;         /* number of buffers requested by journal_begin() */
223       atomic_t j_wcount ;            /* count of writers for current commit */
224       unsigned long j_bcount ;            /* batch count. allows turning X transactions into 1 */
225       unsigned long j_first_unflushed_offset ;  /* first unflushed transactions offset */
226       unsigned long j_last_flush_trans_id ;    /* last fully flushed journal timestamp */
227       struct buffer_head *j_header_bh ;   
228     
229       /* j_flush_pages must be flushed before the current transaction can
230       ** commit
231       */
232       struct reiserfs_page_list *j_flush_pages ;
233       time_t j_trans_start_time ;         /* time this transaction started */
234       wait_queue_head_t j_wait ;         /* wait  journal_end to finish I/O */
235       atomic_t j_wlock ;                       /* lock for j_wait */
236       wait_queue_head_t j_join_wait ;    /* wait for current transaction to finish before starting new one */
237       atomic_t j_jlock ;                       /* lock for j_join_wait */
238       int j_journal_list_index ;	      /* journal list number of the current trans */
239       int j_list_bitmap_index ;	      /* number of next list bitmap to use */
240       int j_must_wait ;		       /* no more journal begins allowed. MUST sleep on j_join_wait */
241       int j_next_full_flush ;             /* next journal_end will flush all journal list */
242       int j_next_async_flush ;             /* next journal_end will flush all async commits */
243     
244       int j_cnode_used ;	      /* number of cnodes on the used list */
245       int j_cnode_free ;          /* number of cnodes on the free list */
246     
247       struct reiserfs_journal_cnode *j_cnode_free_list ;
248       struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */
249     
250       int j_free_bitmap_nodes ;
251       int j_used_bitmap_nodes ;
252       struct list_head j_bitmap_nodes ;
253       struct inode j_dummy_inode ;
254       struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ;	/* array of bitmaps to record the deleted blocks */
255       struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ;	    /* array of all the journal lists */
256       struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; 	    /* hash table for real buffer heads in current trans */ 
257       struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all 
258       										the transactions */
259       struct list_head j_prealloc_list;     /* list of inodes which have preallocated blocks */
260     };
261     
262     #define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick.  magic string to find desc blocks in the journal */
263     
264     
265     typedef __u32 (*hashf_t) (const char *, int);
266     
267     /* reiserfs union of in-core super block data */
268     struct reiserfs_sb_info
269     {
270         struct buffer_head * s_sbh;                   /* Buffer containing the super block */
271     				/* both the comment and the choice of
272                                        name are unclear for s_rs -Hans */
273         struct reiserfs_super_block * s_rs;           /* Pointer to the super block in the buffer */
274         struct buffer_head ** s_ap_bitmap;       /* array of buffers, holding block bitmap */
275         struct reiserfs_journal *s_journal ;		/* pointer to journal information */
276         unsigned short s_mount_state;                 /* reiserfs state (valid, invalid) */
277       
278     				/* Comment? -Hans */
279         void (*end_io_handler)(struct buffer_head *, int);
280         hashf_t s_hash_function;	/* pointer to function which is used
281                                        to sort names in directory. Set on
282                                        mount */
283         unsigned long s_mount_opt;	/* reiserfs's mount options are set
284                                        here (currently - NOTAIL, NOLOG,
285                                        REPLAYONLY) */
286     
287     				/* Comment? -Hans */
288         wait_queue_head_t s_wait;
289     				/* To be obsoleted soon by per buffer seals.. -Hans */
290         atomic_t s_generation_counter; // increased by one every time the
291         // tree gets re-balanced
292         
293         /* session statistics */
294         int s_kmallocs;
295         int s_disk_reads;
296         int s_disk_writes;
297         int s_fix_nodes;
298         int s_do_balance;
299         int s_unneeded_left_neighbor;
300         int s_good_search_by_key_reada;
301         int s_bmaps;
302         int s_bmaps_without_search;
303         int s_direct2indirect;
304         int s_indirect2direct;
305     };
306     
307     
308     #define NOTAIL 0  /* -o notail: no tails will be created in a session */
309     #define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */
310     #define REISERFS_NOLOG 4      /* -o nolog: turn journalling off */
311     #define REISERFS_CONVERT 5    /* -o conv: causes conversion of old
312                                      format super block to the new
313                                      format. If not specified - old
314                                      partition will be dealt with in a
315                                      manner of 3.5.x */
316     
317     /* -o hash={tea, rupasov, r5, detect} is meant for properly mounting 
318     ** reiserfs disks from 3.5.19 or earlier.  99% of the time, this option
319     ** is not required.  If the normal autodection code can't determine which
320     ** hash to use (because both hases had the same value for a file)
321     ** use this option to force a specific hash.  It won't allow you to override
322     ** the existing hash on the FS, so if you have a tea hash disk, and mount
323     ** with -o hash=rupasov, the mount will fail.
324     */
325     #define FORCE_TEA_HASH 6      /* try to force tea hash on mount */
326     #define FORCE_RUPASOV_HASH 7  /* try to force rupasov hash on mount */
327     #define FORCE_R5_HASH 8       /* try to force rupasov hash on mount */
328     #define FORCE_HASH_DETECT 9   /* try to detect hash function on mount */
329     
330     
331     /* used for testing experimental features, makes benchmarking new
332        features with and without more convenient, should never be used by
333        users in any code shipped to users (ideally) */
334     
335     #define REISERFS_NO_BORDER 11
336     #define REISERFS_NO_UNHASHED_RELOCATION 12
337     #define REISERFS_HASHED_RELOCATION 13
338     #define REISERFS_TEST4 14 
339     
340     #define REISERFS_TEST1 11
341     #define REISERFS_TEST2 12
342     #define REISERFS_TEST3 13
343     #define REISERFS_TEST4 14 
344     
345     #define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH))
346     #define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH))
347     #define reiserfs_tea_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_TEA_HASH))
348     #define reiserfs_hash_detect(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_HASH_DETECT))
349     #define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER))
350     #define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
351     #define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
352     #define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4))
353     
354     #define dont_have_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << NOTAIL))
355     #define replay_only(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REPLAYONLY))
356     #define reiserfs_dont_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NOLOG))
357     #define old_format_only(s) ((SB_VERSION(s) != REISERFS_VERSION_2) && !((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_CONVERT)))
358     
359     
360     void reiserfs_file_buffer (struct buffer_head * bh, int list);
361     int reiserfs_is_super(struct super_block *s)  ;
362     int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ;
363     int flush_old_commits(struct super_block *s, int) ;
364     int show_reiserfs_locks(void) ;
365     int reiserfs_resize(struct super_block *, unsigned long) ;
366     
367     #define CARRY_ON                0
368     #define SCHEDULE_OCCURRED       1
369     
370     
371     #define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh)
372     #define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal)
373     #define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list)
374     #define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index) 
375     #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) 
376     #define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap)
377     
378     
379     // on-disk super block fields converted to cpu form
380     #define SB_DISK_SUPER_BLOCK(s) ((s)->u.reiserfs_sb.s_rs)
381     #define SB_BLOCK_COUNT(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_block_count))
382     #define SB_FREE_BLOCKS(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_free_blocks))
383     #define SB_REISERFS_MAGIC(s) (SB_DISK_SUPER_BLOCK(s)->s_magic)
384     #define SB_ROOT_BLOCK(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_root_block))
385     #define SB_TREE_HEIGHT(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_tree_height))
386     #define SB_REISERFS_STATE(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_state))
387     #define SB_VERSION(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_version))
388     #define SB_BMAP_NR(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_bmap_nr))
389     
390     #define PUT_SB_BLOCK_COUNT(s, val)    do { SB_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
391     #define PUT_SB_FREE_BLOCKS(s, val)    do { SB_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
392     #define PUT_SB_ROOT_BLOCK(s, val)     do { SB_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
393     #define PUT_SB_TREE_HEIGHT(s, val)    do { SB_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
394     #define PUT_SB_REISERFS_STATE(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_state = cpu_to_le16(val); } while (0) 
395     #define PUT_SB_VERSION(s, val)        do { SB_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
396     #define PUT_SB_BMAP_NR(s, val)           do { SB_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
397     
398     #endif	/* _LINUX_REISER_FS_SB */
399     
400     
401     
402