ext4.h 89.3 KB
Newer Older
1
/*
2
 *  ext4.h
3
4
5
6
7
8
9
10
11
12
13
14
15
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/include/linux/minix_fs.h
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

16
17
#ifndef _EXT4_H
#define _EXT4_H
18
19

#include <linux/types.h>
20
#include <linux/blkdev.h>
21
#include <linux/magic.h>
22
#include <linux/jbd2.h>
23
#include <linux/quota.h>
24
25
26
27
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/mutex.h>
28
29
30
31
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
32
#include <crypto/hash.h>
33
34
35
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
36

37
/*
Shen Feng's avatar
Shen Feng committed
38
 * The fourth extended filesystem constants/structures
39
40
41
 */

/*
42
 * Define EXT4FS_DEBUG to produce debug messages
43
 */
44
#undef EXT4FS_DEBUG
45
46
47
48

/*
 * Debug code
 */
49
50
#ifdef EXT4FS_DEBUG
#define ext4_debug(f, a...)						\
51
	do {								\
52
		printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:",	\
53
			__FILE__, __LINE__, __func__);			\
54
		printk(KERN_DEBUG f, ## a);				\
55
56
	} while (0)
#else
57
#define ext4_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
58
59
#endif

60
61
62
63
64
65
66
67
68
69
/*
 * Turn on EXT_DEBUG to get lots of info about extents operations.
 */
#define EXT_DEBUG__
#ifdef EXT_DEBUG
#define ext_debug(fmt, ...)	printk(fmt, ##__VA_ARGS__)
#else
#define ext_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
#endif

70
#define EXT4_ERROR_INODE(inode, fmt, a...) \
71
72
73
74
	ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)

#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...)			\
	ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
75

76
77
#define EXT4_ERROR_FILE(file, block, fmt, a...)				\
	ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
78

79
80
81
82
83
84
85
86
87
88
89
90
/* data type for block offset of block group */
typedef int ext4_grpblk_t;

/* data type for filesystem-wide blocks number */
typedef unsigned long long ext4_fsblk_t;

/* data type for file logical block number */
typedef __u32 ext4_lblk_t;

/* data type for block group number */
typedef unsigned int ext4_group_t;

91
/*
92
 * Flags used in mballoc's allocation_context flags field.
93
94
95
96
 *
 * Also used to show what's going on for debugging purposes when the
 * flag field is exported via the traceport interface
 */
97

98
/* prefer goal again. length */
99
#define EXT4_MB_HINT_MERGE		0x0001
100
/* blocks already reserved */
101
#define EXT4_MB_HINT_RESERVED		0x0002
102
/* metadata is being allocated */
103
#define EXT4_MB_HINT_METADATA		0x0004
104
/* first blocks in the file */
105
#define EXT4_MB_HINT_FIRST		0x0008
106
/* search for the best chunk */
107
#define EXT4_MB_HINT_BEST		0x0010
108
/* data is being allocated */
109
#define EXT4_MB_HINT_DATA		0x0020
110
/* don't preallocate (for tails) */
111
#define EXT4_MB_HINT_NOPREALLOC		0x0040
112
/* allocate for locality group */
113
#define EXT4_MB_HINT_GROUP_ALLOC	0x0080
114
/* allocate goal blocks or none */
115
#define EXT4_MB_HINT_GOAL_ONLY		0x0100
116
/* goal is meaningful */
117
#define EXT4_MB_HINT_TRY_GOAL		0x0200
118
/* blocks already pre-reserved by delayed allocation */
119
#define EXT4_MB_DELALLOC_RESERVED	0x0400
120
121
/* We are doing stream allocation */
#define EXT4_MB_STREAM_ALLOC		0x0800
122
123
/* Use reserved root blocks if needed */
#define EXT4_MB_USE_ROOT_BLOCKS		0x1000
124
125
126
127

struct ext4_allocation_request {
	/* target inode for block we're allocating */
	struct inode *inode;
128
129
	/* how many blocks we want to allocate */
	unsigned int len;
130
131
132
133
134
135
	/* logical block in target inode */
	ext4_lblk_t logical;
	/* the closest logical allocated block to the left */
	ext4_lblk_t lleft;
	/* the closest logical allocated block to the right */
	ext4_lblk_t lright;
136
137
138
139
140
	/* phys. target (a hint) */
	ext4_fsblk_t goal;
	/* phys. block for the closest logical allocated block to the left */
	ext4_fsblk_t pleft;
	/* phys. block for the closest logical allocated block to the right */
141
142
	ext4_fsblk_t pright;
	/* flags. see above EXT4_MB_HINT_* */
143
	unsigned int flags;
144
145
};

146
147
148
149
150
151
152
153
154
155
156
157
/*
 * Logical to physical block mapping, used by ext4_map_blocks()
 *
 * This structure is used to pass requests into ext4_map_blocks() as
 * well as to store the information returned by ext4_map_blocks().  It
 * takes less room on the stack than a struct buffer_head.
 */
#define EXT4_MAP_NEW		(1 << BH_New)
#define EXT4_MAP_MAPPED		(1 << BH_Mapped)
#define EXT4_MAP_UNWRITTEN	(1 << BH_Unwritten)
#define EXT4_MAP_BOUNDARY	(1 << BH_Boundary)
#define EXT4_MAP_UNINIT		(1 << BH_Uninit)
158
159
160
161
162
163
164
165
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
 * ext4_map_blocks wants to know whether or not the underlying cluster has
 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
 * the requested mapping was from previously mapped (or delayed allocated)
 * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
 * should never appear on buffer_head's state flags.
 */
#define EXT4_MAP_FROM_CLUSTER	(1 << BH_AllocFromCluster)
166
167
#define EXT4_MAP_FLAGS		(EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
				 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
168
				 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
169
170
171
172
173
174
175
176

struct ext4_map_blocks {
	ext4_fsblk_t m_pblk;
	ext4_lblk_t m_lblk;
	unsigned int m_len;
	unsigned int m_flags;
};

177
178
179
180
181
182
183
184
185
186
187
188
189
190
/*
 * For delayed allocation tracking
 */
struct mpage_da_data {
	struct inode *inode;
	sector_t b_blocknr;		/* start block number of extent */
	size_t b_size;			/* size of extent */
	unsigned long b_state;		/* state of the extent */
	unsigned long first_page, next_page;	/* extent of pages */
	struct writeback_control *wbc;
	int io_done;
	int pages_written;
	int retval;
};
191
192
193
194
195
196

/*
 * Flags for ext4_io_end->flags
 */
#define	EXT4_IO_END_UNWRITTEN	0x0001
#define EXT4_IO_END_ERROR	0x0002
197
#define EXT4_IO_END_QUEUED	0x0004
198
#define EXT4_IO_END_DIRECT	0x0008
199
200
201

struct ext4_io_page {
	struct page	*p_page;
202
	atomic_t	p_count;
203
204
205
206
};

#define MAX_IO_PAGES 128

207
208
209
210
211
212
213
/*
 * For converting uninitialized extents on a work queue.
 *
 * 'page' is only used from the writepage() path; 'pages' is only used for
 * buffered writes; they are used to keep page references until conversion
 * takes place.  For AIO/DIO, neither field is filled in.
 */
214
typedef struct ext4_io_end {
215
	struct list_head	list;		/* per-file finished IO list */
216
	struct inode		*inode;		/* file being written to */
217
	unsigned int		flag;		/* unwritten or not */
218
	struct page		*page;		/* for writepage() path */
219
220
	loff_t			offset;		/* offset in the file */
	ssize_t			size;		/* size of the extent */
221
	struct work_struct	work;		/* data work queue */
222
223
	struct kiocb		*iocb;		/* iocb struct for AIO */
	int			result;		/* error value for AIO */
224
225
	int			num_io_pages;   /* for writepages() */
	struct ext4_io_page	*pages[MAX_IO_PAGES]; /* for writepages() */
226
227
} ext4_io_end_t;

228
229
230
231
232
233
234
235
struct ext4_io_submit {
	int			io_op;
	struct bio		*io_bio;
	ext4_io_end_t		*io_end;
	struct ext4_io_page	*io_page;
	sector_t		io_next_block;
};

236
237
238
/*
 * Special inodes numbers
 */
239
240
#define	EXT4_BAD_INO		 1	/* Bad blocks inode */
#define EXT4_ROOT_INO		 2	/* Root inode */
241
242
#define EXT4_USR_QUOTA_INO	 3	/* User quota inode */
#define EXT4_GRP_QUOTA_INO	 4	/* Group quota inode */
243
244
245
246
#define EXT4_BOOT_LOADER_INO	 5	/* Boot loader inode */
#define EXT4_UNDEL_DIR_INO	 6	/* Undelete directory inode */
#define EXT4_RESIZE_INO		 7	/* Reserved group descriptors inode */
#define EXT4_JOURNAL_INO	 8	/* Journal inode */
247

248
249
/* First non-reserved inode for old ext4 filesystems */
#define EXT4_GOOD_OLD_FIRST_INO	11
250
251
252
253

/*
 * Maximal count of links to a file
 */
254
#define EXT4_LINK_MAX		65000
255
256
257
258

/*
 * Macro-instructions used to manage several block sizes
 */
259
#define EXT4_MIN_BLOCK_SIZE		1024
260
261
#define	EXT4_MAX_BLOCK_SIZE		65536
#define EXT4_MIN_BLOCK_LOG_SIZE		10
262
#define EXT4_MAX_BLOCK_LOG_SIZE		16
263
#ifdef __KERNEL__
264
# define EXT4_BLOCK_SIZE(s)		((s)->s_blocksize)
265
#else
266
# define EXT4_BLOCK_SIZE(s)		(EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
267
#endif
268
#define	EXT4_ADDR_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / sizeof(__u32))
269
270
#define EXT4_CLUSTER_SIZE(s)		(EXT4_BLOCK_SIZE(s) << \
					 EXT4_SB(s)->s_cluster_bits)
271
#ifdef __KERNEL__
272
# define EXT4_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
273
# define EXT4_CLUSTER_BITS(s)		(EXT4_SB(s)->s_cluster_bits)
274
#else
275
# define EXT4_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
276
277
#endif
#ifdef __KERNEL__
278
279
280
#define	EXT4_ADDR_PER_BLOCK_BITS(s)	(EXT4_SB(s)->s_addr_per_block_bits)
#define EXT4_INODE_SIZE(s)		(EXT4_SB(s)->s_inode_size)
#define EXT4_FIRST_INO(s)		(EXT4_SB(s)->s_first_ino)
281
#else
282
283
#define EXT4_INODE_SIZE(s)	(((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
				 EXT4_GOOD_OLD_INODE_SIZE : \
284
				 (s)->s_inode_size)
285
286
#define EXT4_FIRST_INO(s)	(((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
				 EXT4_GOOD_OLD_FIRST_INO : \
287
288
				 (s)->s_first_ino)
#endif
Amit Arora's avatar
Amit Arora committed
289
#define EXT4_BLOCK_ALIGN(size, blkbits)		ALIGN((size), (1 << (blkbits)))
290

291
292
293
294
295
296
297
298
/* Translate a block number to a cluster number */
#define EXT4_B2C(sbi, blk)	((blk) >> (sbi)->s_cluster_bits)
/* Translate a cluster number to a block number */
#define EXT4_C2B(sbi, cluster)	((cluster) << (sbi)->s_cluster_bits)
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks)	(((blks) + (sbi)->s_cluster_ratio - 1) >> \
				 (sbi)->s_cluster_bits)

299
300
301
/*
 * Structure of a blocks group descriptor
 */
302
struct ext4_group_desc
303
{
304
	__le32	bg_block_bitmap_lo;	/* Blocks bitmap block */
305
306
	__le32	bg_inode_bitmap_lo;	/* Inodes bitmap block */
	__le32	bg_inode_table_lo;	/* Inodes table block */
307
308
309
	__le16	bg_free_blocks_count_lo;/* Free blocks count */
	__le16	bg_free_inodes_count_lo;/* Free inodes count */
	__le16	bg_used_dirs_count_lo;	/* Directories count */
310
	__le16	bg_flags;		/* EXT4_BG_flags (INODE_UNINIT, etc) */
311
312
313
	__le32  bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
	__le16  bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
	__le16  bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
314
	__le16  bg_itable_unused_lo;	/* Unused inodes count */
315
	__le16  bg_checksum;		/* crc16(sb_uuid+group+desc) */
316
317
318
	__le32	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
	__le32	bg_inode_bitmap_hi;	/* Inodes bitmap block MSB */
	__le32	bg_inode_table_hi;	/* Inodes table block MSB */
319
320
321
	__le16	bg_free_blocks_count_hi;/* Free blocks count MSB */
	__le16	bg_free_inodes_count_hi;/* Free inodes count MSB */
	__le16	bg_used_dirs_count_hi;	/* Directories count MSB */
322
	__le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
323
324
325
326
	__le32  bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
	__le16  bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
	__le16  bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
	__u32   bg_reserved;
327
328
};

329
330
331
332
333
334
335
#define EXT4_BG_INODE_BITMAP_CSUM_HI_END	\
	(offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \
	 sizeof(__le16))
#define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END	\
	(offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \
	 sizeof(__le16))

336
337
338
339
340
/*
 * Structure of a flex block group info
 */

struct flex_groups {
341
	atomic_t free_inodes;
342
	atomic_t free_clusters;
343
	atomic_t used_dirs;
344
345
};

346
347
348
349
#define EXT4_BG_INODE_UNINIT	0x0001 /* Inode table/bitmap not in use */
#define EXT4_BG_BLOCK_UNINIT	0x0002 /* Block bitmap not in use */
#define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */

350
351
352
/*
 * Macro-instructions used to manage group descriptors
 */
353
#define EXT4_MIN_DESC_SIZE		32
354
#define EXT4_MIN_DESC_SIZE_64BIT	64
355
356
#define	EXT4_MAX_DESC_SIZE		EXT4_MIN_BLOCK_SIZE
#define EXT4_DESC_SIZE(s)		(EXT4_SB(s)->s_desc_size)
357
#ifdef __KERNEL__
358
# define EXT4_BLOCKS_PER_GROUP(s)	(EXT4_SB(s)->s_blocks_per_group)
359
# define EXT4_CLUSTERS_PER_GROUP(s)	(EXT4_SB(s)->s_clusters_per_group)
360
361
362
# define EXT4_DESC_PER_BLOCK(s)		(EXT4_SB(s)->s_desc_per_block)
# define EXT4_INODES_PER_GROUP(s)	(EXT4_SB(s)->s_inodes_per_group)
# define EXT4_DESC_PER_BLOCK_BITS(s)	(EXT4_SB(s)->s_desc_per_block_bits)
363
#else
364
# define EXT4_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
365
# define EXT4_DESC_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
366
# define EXT4_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
367
368
369
370
371
#endif

/*
 * Constants relative to the data blocks
 */
372
373
374
375
376
#define	EXT4_NDIR_BLOCKS		12
#define	EXT4_IND_BLOCK			EXT4_NDIR_BLOCKS
#define	EXT4_DIND_BLOCK			(EXT4_IND_BLOCK + 1)
#define	EXT4_TIND_BLOCK			(EXT4_DIND_BLOCK + 1)
#define	EXT4_N_BLOCKS			(EXT4_TIND_BLOCK + 1)
377
378
379
380

/*
 * Inode flags
 */
381
382
383
384
385
386
387
388
#define	EXT4_SECRM_FL			0x00000001 /* Secure deletion */
#define	EXT4_UNRM_FL			0x00000002 /* Undelete */
#define	EXT4_COMPR_FL			0x00000004 /* Compress file */
#define EXT4_SYNC_FL			0x00000008 /* Synchronous updates */
#define EXT4_IMMUTABLE_FL		0x00000010 /* Immutable file */
#define EXT4_APPEND_FL			0x00000020 /* writes to file may only append */
#define EXT4_NODUMP_FL			0x00000040 /* do not dump file */
#define EXT4_NOATIME_FL			0x00000080 /* do not update atime */
389
/* Reserved for compression usage... */
390
391
392
393
#define EXT4_DIRTY_FL			0x00000100
#define EXT4_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
#define EXT4_NOCOMPR_FL			0x00000400 /* Don't compress */
#define EXT4_ECOMPR_FL			0x00000800 /* Compression error */
394
/* End compression flags --- maybe not all used */
395
396
397
398
399
400
#define EXT4_INDEX_FL			0x00001000 /* hash-indexed directory */
#define EXT4_IMAGIC_FL			0x00002000 /* AFS directory */
#define EXT4_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
#define EXT4_NOTAIL_FL			0x00008000 /* file tail should not be merged */
#define EXT4_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
#define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
401
#define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
402
#define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
403
#define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
404
#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
405
#define EXT4_INLINE_DATA_FL		0x10000000 /* Inode has inline data. */
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
406
#define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
407

408
409
#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
410

411
412
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
413
			   EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
414
415
416
			   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
			   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))

/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)

/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
	if (S_ISDIR(mode))
		return flags;
	else if (S_ISREG(mode))
		return flags & EXT4_REG_FLMASK;
	else
		return flags & EXT4_OTHER_FLMASK;
}

434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/*
 * Inode flags used for atomic set/get
 */
enum {
	EXT4_INODE_SECRM	= 0,	/* Secure deletion */
	EXT4_INODE_UNRM		= 1,	/* Undelete */
	EXT4_INODE_COMPR	= 2,	/* Compress file */
	EXT4_INODE_SYNC		= 3,	/* Synchronous updates */
	EXT4_INODE_IMMUTABLE	= 4,	/* Immutable file */
	EXT4_INODE_APPEND	= 5,	/* writes to file may only append */
	EXT4_INODE_NODUMP	= 6,	/* do not dump file */
	EXT4_INODE_NOATIME	= 7,	/* do not update atime */
/* Reserved for compression usage... */
	EXT4_INODE_DIRTY	= 8,
	EXT4_INODE_COMPRBLK	= 9,	/* One or more compressed clusters */
	EXT4_INODE_NOCOMPR	= 10,	/* Don't compress */
	EXT4_INODE_ECOMPR	= 11,	/* Compression error */
/* End compression flags --- maybe not all used */
	EXT4_INODE_INDEX	= 12,	/* hash-indexed directory */
	EXT4_INODE_IMAGIC	= 13,	/* AFS directory */
	EXT4_INODE_JOURNAL_DATA	= 14,	/* file data should be journaled */
	EXT4_INODE_NOTAIL	= 15,	/* file tail should not be merged */
	EXT4_INODE_DIRSYNC	= 16,	/* dirsync behaviour (directories only) */
	EXT4_INODE_TOPDIR	= 17,	/* Top of directory hierarchies*/
	EXT4_INODE_HUGE_FILE	= 18,	/* Set to each huge file */
	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
462
	EXT4_INODE_INLINE_DATA	= 28,	/* Data in inode. */
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
	EXT4_INODE_RESERVED	= 31,	/* reserved for ext4 lib */
};

#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
	printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
		EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }

/*
 * Since it's pretty easy to mix up bit numbers and hex values, and we
 * can't do a compile-time test for ENUM values, we use a run-time
 * test to make sure that EXT4_XXX_FL is consistent with respect to
 * EXT4_INODE_XXX.  If all is well the printk and BUG_ON will all drop
 * out so it won't cost any extra space in the compiled kernel image.
 * But it's important that these values are the same, since we are
 * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
 * must be consistent with the values of FS_XXX_FL defined in
 * include/linux/fs.h and the on-disk values found in ext2, ext3, and
 * ext4 filesystems, and of course the values defined in e2fsprogs.
 *
 * It's not paranoia if the Murphy's Law really *is* out to get you.  :-)
 */
static inline void ext4_check_flag_values(void)
{
	CHECK_FLAG_VALUE(SECRM);
	CHECK_FLAG_VALUE(UNRM);
	CHECK_FLAG_VALUE(COMPR);
	CHECK_FLAG_VALUE(SYNC);
	CHECK_FLAG_VALUE(IMMUTABLE);
	CHECK_FLAG_VALUE(APPEND);
	CHECK_FLAG_VALUE(NODUMP);
	CHECK_FLAG_VALUE(NOATIME);
	CHECK_FLAG_VALUE(DIRTY);
	CHECK_FLAG_VALUE(COMPRBLK);
	CHECK_FLAG_VALUE(NOCOMPR);
	CHECK_FLAG_VALUE(ECOMPR);
	CHECK_FLAG_VALUE(INDEX);
	CHECK_FLAG_VALUE(IMAGIC);
	CHECK_FLAG_VALUE(JOURNAL_DATA);
	CHECK_FLAG_VALUE(NOTAIL);
	CHECK_FLAG_VALUE(DIRSYNC);
	CHECK_FLAG_VALUE(TOPDIR);
	CHECK_FLAG_VALUE(HUGE_FILE);
	CHECK_FLAG_VALUE(EXTENTS);
	CHECK_FLAG_VALUE(EA_INODE);
	CHECK_FLAG_VALUE(EOFBLOCKS);
509
	CHECK_FLAG_VALUE(INLINE_DATA);
510
511
512
	CHECK_FLAG_VALUE(RESERVED);
}

513
/* Used to pass group descriptor data when online resize is done */
514
struct ext4_new_group_input {
Dave Kleikamp's avatar
Dave Kleikamp committed
515
516
517
518
519
520
	__u32 group;		/* Group number for this data */
	__u64 block_bitmap;	/* Absolute block number of block bitmap */
	__u64 inode_bitmap;	/* Absolute block number of inode bitmap */
	__u64 inode_table;	/* Absolute block number of inode table start */
	__u32 blocks_count;	/* Total number of blocks in this group */
	__u16 reserved_blocks;	/* Number of reserved blocks in this group */
521
522
523
	__u16 unused;
};

524
525
526
527
528
529
530
531
532
533
534
535
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
struct compat_ext4_new_group_input {
	u32 group;
	compat_u64 block_bitmap;
	compat_u64 inode_bitmap;
	compat_u64 inode_table;
	u32 blocks_count;
	u16 reserved_blocks;
	u16 unused;
};
#endif

536
537
/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
struct ext4_new_group_data {
538
	__u32 group;
Laurent Vivier's avatar
Laurent Vivier committed
539
540
541
	__u64 block_bitmap;
	__u64 inode_bitmap;
	__u64 inode_table;
542
543
544
545
546
547
	__u32 blocks_count;
	__u16 reserved_blocks;
	__u16 unused;
	__u32 free_blocks_count;
};

548
549
550
551
552
553
554
555
/* Indexes used to index group tables in ext4_new_group_data */
enum {
	BLOCK_BITMAP = 0,	/* block bitmap */
	INODE_BITMAP,		/* inode bitmap */
	INODE_TABLE,		/* inode tables */
	GROUP_TABLE_COUNT,
};

Amit Arora's avatar
Amit Arora committed
556
/*
557
 * Flags used by ext4_map_blocks()
Amit Arora's avatar
Amit Arora committed
558
 */
559
560
	/* Allocate any needed blocks and/or convert an unitialized
	   extent to be an initialized ext4 */
561
#define EXT4_GET_BLOCKS_CREATE			0x0001
562
	/* Request the creation of an unitialized extent */
563
#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
564
565
566
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
						 EXT4_GET_BLOCKS_CREATE)
	/* Caller is from the delayed allocation writeout path,
567
	   so set the magic i_delalloc_reserve_flag after taking the
568
	   inode allocation semaphore for */
569
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
570
571
572
	/* caller is from the direct IO path, request to creation of an
	unitialized extents if not allocated, split the uninitialized
	extent if blocks has been preallocated already*/
573
#define EXT4_GET_BLOCKS_PRE_IO			0x0008
574
#define EXT4_GET_BLOCKS_CONVERT			0x0010
575
#define EXT4_GET_BLOCKS_IO_CREATE_EXT		(EXT4_GET_BLOCKS_PRE_IO|\
576
					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
577
578
	/* Convert extent to initialized after IO complete */
#define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
579
					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
580
581
	/* Punch out blocks of an extent */
#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT		0x0020
582
583
	/* Don't normalize allocation size (used for fallocate) */
#define EXT4_GET_BLOCKS_NO_NORMALIZE		0x0040
584
585
	/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE		0x0080
586
587
	/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK			0x0100
588

589
590
591
592
593
/*
 * Flags used by ext4_free_blocks
 */
#define EXT4_FREE_BLOCKS_METADATA	0x0001
#define EXT4_FREE_BLOCKS_FORGET		0x0002
594
#define EXT4_FREE_BLOCKS_VALIDATED	0x0004
595
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE	0x0008
596
597
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER	0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER	0x0020
598

599
600
601
602
/*
 * Flags used by ext4_discard_partial_page_buffers
 */
#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED	0x0001
603

604
605
606
/*
 * ioctl commands
 */
607
608
609
610
611
612
613
614
#define	EXT4_IOC_GETFLAGS		FS_IOC_GETFLAGS
#define	EXT4_IOC_SETFLAGS		FS_IOC_SETFLAGS
#define	EXT4_IOC_GETVERSION		_IOR('f', 3, long)
#define	EXT4_IOC_SETVERSION		_IOW('f', 4, long)
#define	EXT4_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
#define	EXT4_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
#define EXT4_IOC_GETRSVSZ		_IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ		_IOW('f', 6, long)
615
616
617
#define EXT4_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
#define EXT4_IOC_GROUP_ADD		_IOW('f', 8, struct ext4_new_group_input)
#define EXT4_IOC_MIGRATE		_IO('f', 9)
618
 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
619
 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
620
#define EXT4_IOC_ALLOC_DA_BLKS		_IO('f', 12)
621
#define EXT4_IOC_MOVE_EXT		_IOWR('f', 15, struct move_extent)
622
#define EXT4_IOC_RESIZE_FS		_IOW('f', 16, __u64)
623

624
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
625
626
627
/*
 * ioctl commands in 32 bit emulation
 */
628
629
630
631
632
633
634
#define EXT4_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
#define EXT4_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
#define EXT4_IOC32_GETVERSION		_IOR('f', 3, int)
#define EXT4_IOC32_SETVERSION		_IOW('f', 4, int)
#define EXT4_IOC32_GETRSVSZ		_IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ		_IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
635
#define EXT4_IOC32_GROUP_ADD		_IOW('f', 8, struct compat_ext4_new_group_input)
636
637
#define EXT4_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
#define EXT4_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION
638
#endif
639

640
/* Max physical block we can address w/o extents */
641
642
#define EXT4_MAX_BLOCK_FILE_PHYS	0xFFFFFFFF

643
644
645
/*
 * Structure of an inode on the disk
 */
646
struct ext4_inode {
647
648
	__le16	i_mode;		/* File mode */
	__le16	i_uid;		/* Low 16 bits of Owner Uid */
649
	__le32	i_size_lo;	/* Size in bytes */
650
	__le32	i_atime;	/* Access time */
Kalpak Shah's avatar
Kalpak Shah committed
651
	__le32	i_ctime;	/* Inode Change time */
652
653
654
655
	__le32	i_mtime;	/* Modification time */
	__le32	i_dtime;	/* Deletion Time */
	__le16	i_gid;		/* Low 16 bits of Group Id */
	__le16	i_links_count;	/* Links count */
656
	__le32	i_blocks_lo;	/* Blocks count */
657
658
659
	__le32	i_flags;	/* File flags */
	union {
		struct {
660
			__le32  l_i_version;
661
662
663
664
665
666
667
668
		} linux1;
		struct {
			__u32  h_i_translator;
		} hurd1;
		struct {
			__u32  m_i_reserved1;
		} masix1;
	} osd1;				/* OS dependent 1 */
669
	__le32	i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
670
	__le32	i_generation;	/* File version (for NFS) */
671
	__le32	i_file_acl_lo;	/* File ACL */
672
	__le32	i_size_high;
673
	__le32	i_obso_faddr;	/* Obsoleted fragment address */
674
675
	union {
		struct {
676
			__le16	l_i_blocks_high; /* were l_i_reserved1 */
677
			__le16	l_i_file_acl_high;
Dave Kleikamp's avatar
Dave Kleikamp committed
678
			__le16	l_i_uid_high;	/* these 2 fields */
679
			__le16	l_i_gid_high;	/* were reserved2[0] */
680
681
			__le16	l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
			__le16	l_i_reserved;
682
683
		} linux2;
		struct {
684
			__le16	h_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
685
686
687
688
689
690
			__u16	h_i_mode_high;
			__u16	h_i_uid_high;
			__u16	h_i_gid_high;
			__u32	h_i_author;
		} hurd2;
		struct {
691
			__le16	h_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
692
			__le16	m_i_file_acl_high;
693
694
695
696
			__u32	m_i_reserved2[2];
		} masix2;
	} osd2;				/* OS dependent 2 */
	__le16	i_extra_isize;
697
	__le16	i_checksum_hi;	/* crc32c(uuid+inum+inode) BE */
Kalpak Shah's avatar
Kalpak Shah committed
698
699
700
701
702
	__le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
	__le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
	__le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
	__le32  i_crtime;       /* File Creation time */
	__le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
703
	__le32  i_version_hi;	/* high 32 bits for 64-bit version */
704
705
};

706
707
708
709
710
711
712
713
struct move_extent {
	__u32 reserved;		/* should be zero */
	__u32 donor_fd;		/* donor file descriptor */
	__u64 orig_start;	/* logical start offset in block for orig */
	__u64 donor_start;	/* logical start offset in block for donor */
	__u64 len;		/* block length to be moved */
	__u64 moved_len;	/* moved block length */
};
714

Kalpak Shah's avatar
Kalpak Shah committed
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)

/*
 * Extended fields will fit into an inode if the filesystem was formatted
 * with large inodes (-I 256 or larger) and there are not currently any EAs
 * consuming all of the available space. For new inodes we always reserve
 * enough space for the kernel's known extended fields, but for inodes
 * created with an old kernel this might not have been the case. None of
 * the extended inode fields is critical for correct filesystem operation.
 * This macro checks if a certain field fits in the inode. Note that
 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
 */
#define EXT4_FITS_IN_INODE(ext4_inode, einode, field)	\
	((offsetof(typeof(*ext4_inode), field) +	\
	  sizeof((ext4_inode)->field))			\
	<= (EXT4_GOOD_OLD_INODE_SIZE +			\
	    (einode)->i_extra_isize))			\

static inline __le32 ext4_encode_extra_time(struct timespec *time)
{
       return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
738
739
			   (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) |
                          ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK));
Kalpak Shah's avatar
Kalpak Shah committed
740
741
742
743
744
745
746
}

static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
       if (sizeof(time->tv_sec) > 4)
	       time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
			       << 32;
747
       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
Kalpak Shah's avatar
Kalpak Shah committed
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
}

#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)			       \
do {									       \
	(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);	       \
	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
		(raw_inode)->xtime ## _extra =				       \
				ext4_encode_extra_time(&(inode)->xtime);       \
} while (0)

#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)			       \
do {									       \
	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))		       \
		(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))	       \
		(raw_inode)->xtime ## _extra =				       \
				ext4_encode_extra_time(&(einode)->xtime);      \
} while (0)

#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)			       \
do {									       \
	(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);       \
	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
		ext4_decode_extra_time(&(inode)->xtime,			       \
				       raw_inode->xtime ## _extra);	       \
773
774
	else								       \
		(inode)->xtime.tv_nsec = 0;				       \
Kalpak Shah's avatar
Kalpak Shah committed
775
776
777
778
779
780
781
782
783
784
} while (0)

#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)			       \
do {									       \
	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))		       \
		(einode)->xtime.tv_sec = 				       \
			(signed)le32_to_cpu((raw_inode)->xtime);	       \
	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))	       \
		ext4_decode_extra_time(&(einode)->xtime,		       \
				       raw_inode->xtime ## _extra);	       \
785
786
	else								       \
		(einode)->xtime.tv_nsec = 0;				       \
Kalpak Shah's avatar
Kalpak Shah committed
787
788
} while (0)

789
790
#define i_disk_version osd1.linux1.l_i_version

791
792
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1	osd1.linux1.l_i_reserved1
793
#define i_file_acl_high	osd2.linux2.l_i_file_acl_high
794
#define i_blocks_high	osd2.linux2.l_i_blocks_high
795
796
797
798
#define i_uid_low	i_uid
#define i_gid_low	i_gid
#define i_uid_high	osd2.linux2.l_i_uid_high
#define i_gid_high	osd2.linux2.l_i_gid_high
799
#define i_checksum_lo	osd2.linux2.l_i_checksum_lo
800
801
802
803
804
805
806
807
808
809
810

#elif defined(__GNU__)

#define i_translator	osd1.hurd1.h_i_translator
#define i_uid_high	osd2.hurd2.h_i_uid_high
#define i_gid_high	osd2.hurd2.h_i_gid_high
#define i_author	osd2.hurd2.h_i_author

#elif defined(__masix__)

#define i_reserved1	osd1.masix1.m_i_reserved1
811
#define i_file_acl_high	osd2.masix2.m_i_file_acl_high
812
813
814
815
#define i_reserved2	osd2.masix2.m_i_reserved2

#endif /* defined(__KERNEL__) || defined(__linux__) */

816
817
/*
 * storage for cached extent
818
819
 * If ec_len == 0, then the cache is invalid.
 * If ec_start == 0, then the cache represents a gap (null mapping)
820
821
822
823
824
825
826
 */
struct ext4_ext_cache {
	ext4_fsblk_t	ec_start;
	ext4_lblk_t	ec_block;
	__u32		ec_len; /* must be 32bit to return holes */
};

827
828
#include "extents_status.h"

829
830
831
832
833
834
/*
 * fourth extended file system inode data in memory
 */
struct ext4_inode_info {
	__le32	i_data[15];	/* unconverted */
	__u32	i_dtime;
835
	ext4_fsblk_t	i_file_acl;
836
837
838
839
840
841
842
843
844

	/*
	 * i_block_group is the number of the block group which contains
	 * this file's inode.  Constant across the lifetime of the inode,
	 * it is ued for making block allocation decisions - we try to
	 * place a file's data blocks near its inode block, and new inodes
	 * near to their parent directory's inode.
	 */
	ext4_group_t	i_block_group;
845
	ext4_lblk_t	i_dir_start_lookup;
846
#if (BITS_PER_LONG < 64)
847
	unsigned long	i_state_flags;		/* Dynamic state flags */
848
#endif
849
	unsigned long	i_flags;
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892

#ifdef CONFIG_EXT4_FS_XATTR
	/*
	 * Extended attributes can be read independently of the main file
	 * data. Taking i_mutex even when reading would cause contention
	 * between readers of EAs and writers of regular file data, so
	 * instead we synchronize on xattr_sem when reading or changing
	 * EAs.
	 */
	struct rw_semaphore xattr_sem;
#endif

	struct list_head i_orphan;	/* unlinked but open inodes */

	/*
	 * i_disksize keeps track of what the inode size is ON DISK, not
	 * in memory.  During truncate, i_size is set to the new size by
	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
	 * set i_disksize to 0 until the truncate is actually under way.
	 *
	 * The intent is that i_disksize always represents the blocks which
	 * are used by this file.  This allows recovery to restart truncate
	 * on orphans if we crash during truncate.  We actually write i_disksize
	 * into the on-disk inode when writing inodes out, instead of i_size.
	 *
	 * The only time when i_disksize and i_size may be different is when
	 * a truncate is in progress.  The only things which change i_disksize
	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
	 */
	loff_t	i_disksize;

	/*
	 * i_data_sem is for serialising ext4_truncate() against
	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
	 * data tree are chopped off during truncate. We can't do that in
	 * ext4 because whenever we perform intermediate commits during
	 * truncate, the inode and all the metadata blocks *must* be in a
	 * consistent state which allows truncation of the orphans to restart
	 * during recovery.  Hence we must fix the get_block-vs-truncate race
	 * by other means, so we have i_data_sem.
	 */
	struct rw_semaphore i_data_sem;
	struct inode vfs_inode;
893
	struct jbd2_inode *jinode;
894
895
896
897
898
899
900
901
902
903
904
905

	struct ext4_ext_cache i_cached_extent;
	/*
	 * File creation time. Its function is same as that of
	 * struct timespec i_{a,c,m}time in the generic inode.
	 */
	struct timespec i_crtime;

	/* mballoc */
	struct list_head i_prealloc_list;
	spinlock_t i_prealloc_lock;

906
907
908
909
	/* extents status tree */
	struct ext4_es_tree i_es_tree;
	rwlock_t i_es_lock;

910
911
912
913
	/* ialloc */
	ext4_group_t	i_last_alloc_group;

	/* allocation reservation info for delalloc */
914
	/* In case of bigalloc, these refer to clusters rather than blocks */
915
916
917
	unsigned int i_reserved_data_blocks;
	unsigned int i_reserved_meta_blocks;
	unsigned int i_allocated_meta_blocks;
918
	ext4_lblk_t i_da_metadata_calc_last_lblock;
919
	int i_da_metadata_calc_len;
920
921
922
923

	/* on-disk additional length */
	__u16 i_extra_isize;

924
925
926
927
	/* Indicate the inline data space. */
	u16 i_inline_off;
	u16 i_inline_size;

928
929
930
931
#ifdef CONFIG_QUOTA
	/* quota space reservation, managed internally by quota code */
	qsize_t i_reserved_quota;
#endif
932

933
934
	/* completed IOs that might need unwritten extents handling */
	struct list_head i_completed_io_list;
935
	spinlock_t i_completed_io_lock;
936
	atomic_t i_ioend_count;	/* Number of outstanding io_end structs */
937
	atomic_t i_unwritten; /* Nr. of inflight conversions pending */
938
939

	spinlock_t i_block_reservation_lock;
940
941
942
943
944
945
946

	/*
	 * Transactions that contain inode's metadata needed to complete
	 * fsync and fdatasync, respectively.
	 */
	tid_t i_sync_tid;
	tid_t i_datasync_tid;
947
948
949

	/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
	__u32 i_csum_seed;
950
951
};

952
953
954
/*
 * File system states
 */
955
956
957
#define	EXT4_VALID_FS			0x0001	/* Unmounted cleanly */
#define	EXT4_ERROR_FS			0x0002	/* Errors detected */
#define	EXT4_ORPHAN_FS			0x0004	/* Orphans being recovered */
958

959
960
961
962
963
964
965
/*
 * Misc. filesystem flags
 */
#define EXT2_FLAGS_SIGNED_HASH		0x0001  /* Signed dirhash in use */
#define EXT2_FLAGS_UNSIGNED_HASH	0x0002  /* Unsigned dirhash in use */
#define EXT2_FLAGS_TEST_FILESYS		0x0004	/* to test development code */

966
967
968
/*
 * Mount flags
 */
969
970
971
972
973
#define EXT4_MOUNT_GRPID		0x00004	/* Create files with directory's group */
#define EXT4_MOUNT_DEBUG		0x00008	/* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
#define EXT4_MOUNT_ERRORS_RO		0x00020	/* Remount fs ro on errors */
#define EXT4_MOUNT_ERRORS_PANIC		0x00040	/* Panic on errors */
974
#define EXT4_MOUNT_ERRORS_MASK		0x00070
975
976
977
978
979
980
981
982
983
984
#define EXT4_MOUNT_MINIX_DF		0x00080	/* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD		0x00100	/* Don't use existing journal*/
#define EXT4_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA		0x00400	/* Write data to journal */
#define EXT4_MOUNT_ORDERED_DATA		0x00800	/* Flush data before commit */
#define EXT4_MOUNT_WRITEBACK_DATA	0x00C00	/* No data ordering */
#define EXT4_MOUNT_UPDATE_JOURNAL	0x01000	/* Update the journal format */
#define EXT4_MOUNT_NO_UID32		0x02000  /* Disable 32-bit UIDs */
#define EXT4_MOUNT_XATTR_USER		0x04000	/* Extended user attributes */
#define EXT4_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
985
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC	0x10000	/* No auto delalloc mapping */
986
987
988
989
#define EXT4_MOUNT_BARRIER		0x20000 /* Use block barriers */
#define EXT4_MOUNT_QUOTA		0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
990
#define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
991
#define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
992
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
993
#define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
994
#define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
995
#define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
996
#define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
997
#define EXT4_MOUNT_DISCARD		0x40000000 /* Issue DISCARD requests */
998
#define EXT4_MOUNT_INIT_INODE_TABLE	0x80000000 /* Initialize uninitialized itables */
999

1000
1001
1002
#define EXT4_MOUNT2_EXPLICIT_DELALLOC	0x00000001 /* User explicitly
						      specified delalloc */

1003
1004
1005
1006
#define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
						~EXT4_MOUNT_##opt
#define set_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt |= \
						EXT4_MOUNT_##opt
1007
1008
#define test_opt(sb, opt)		(EXT4_SB(sb)->s_mount_opt & \
					 EXT4_MOUNT_##opt)
1009

1010
1011
1012
1013
1014
1015
1016
#define clear_opt2(sb, opt)		EXT4_SB(sb)->s_mount_opt2 &= \
						~EXT4_MOUNT2_##opt
#define set_opt2(sb, opt)		EXT4_SB(sb)->s_mount_opt2 |= \
						EXT4_MOUNT2_##opt
#define test_opt2(sb, opt)		(EXT4_SB(sb)->s_mount_opt2 & \
					 EXT4_MOUNT2_##opt)

1017
1018
#define ext4_test_and_set_bit		__test_and_set_bit_le
#define ext4_set_bit			__set_bit_le
1019
#define ext4_set_bit_atomic		ext2_set_bit_atomic
1020
1021
#define ext4_test_and_clear_bit		__test_and_clear_bit_le
#define ext4_clear_bit			__clear_bit_le
1022
#define ext4_clear_bit_atomic		ext2_clear_bit_atomic
Akinobu Mita's avatar
Akinobu Mita committed
1023
1024
1025
#define ext4_test_bit			test_bit_le
#define ext4_find_next_zero_bit		find_next_zero_bit_le
#define ext4_find_next_bit		find_next_bit_le
1026

1027
1028
extern void ext4_set_bits(void *bm, int cur, int len);

1029
1030
1031
/*
 * Maximal mount counts between two filesystem checks
 */
1032
1033
#define EXT4_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
#define EXT4_DFL_CHECKINTERVAL		0	/* Don't use interval check */
1034
1035
1036
1037

/*
 * Behaviour when detecting errors
 */
1038
1039
1040
1041
#define EXT4_ERRORS_CONTINUE		1	/* Continue execution */
#define EXT4_ERRORS_RO			2	/* Remount fs read-only */
#define EXT4_ERRORS_PANIC		3	/* Panic */
#define EXT4_ERRORS_DEFAULT		EXT4_ERRORS_CONTINUE
1042

1043
1044
1045
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM		1

1046
1047
1048
/*
 * Structure of the super block
 */
1049
struct ext4_super_block {
1050
/*00*/	__le32	s_inodes_count;		/* Inodes count */
1051
	__le32	s_blocks_count_lo;	/* Blocks count */
1052
1053
	__le32	s_r_blocks_count_lo;	/* Reserved blocks count */
	__le32	s_free_blocks_count_lo;	/* Free blocks count */
1054
1055
1056
/*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
	__le32	s_first_data_block;	/* First Data Block */
	__le32	s_log_block_size;	/* Block size */
1057
	__le32	s_log_cluster_size;	/* Allocation cluster size */
1058
/*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
1059
	__le32	s_clusters_per_group;	/* # Clusters per group */
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
	__le32	s_inodes_per_group;	/* # Inodes per group */
	__le32	s_mtime;		/* Mount time */
/*30*/	__le32	s_wtime;		/* Write time */
	__le16	s_mnt_count;		/* Mount count */
	__le16	s_max_mnt_count;	/* Maximal mount count */
	__le16	s_magic;		/* Magic signature */
	__le16	s_state;		/* File system state */
	__le16	s_errors;		/* Behaviour when detecting errors */
	__le16	s_minor_rev_level;	/* minor revision level */
/*40*/	__le32	s_lastcheck;		/* time of last check */
	__le32	s_checkinterval;	/* max. time between checks */
	__le32	s_creator_os;		/* OS */
	__le32	s_rev_level;		/* Revision level */
/*50*/	__le16	s_def_resuid;		/* Default uid for reserved blocks */
	__le16	s_def_resgid;		/* Default gid for reserved blocks */
	/*
1076
	 * These fields are for EXT4_DYNAMIC_REV superblocks only.
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
	 *
	 * Note: the difference between the compatible feature set and
	 * the incompatible feature set is that if there is a bit set
	 * in the incompatible feature set that the kernel doesn't
	 * know about, it should refuse to mount the filesystem.
	 *
	 * e2fsck's requirements are more strict; if it doesn't know
	 * about a feature in either the compatible or incompatible
	 * feature set, it must abort and not try to meddle with
	 * things it doesn't understand...
	 */
	__le32	s_first_ino;		/* First non-reserved inode */
1089
	__le16  s_inode_size;		/* size of inode structure */
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
	__le16	s_block_group_nr;	/* block group # of this superblock */
	__le32	s_feature_compat;	/* compatible feature set */
/*60*/	__le32	s_feature_incompat;	/* incompatible feature set */
	__le32	s_feature_ro_compat;	/* readonly-compatible feature set */
/*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
/*78*/	char	s_volume_name[16];	/* volume name */
/*88*/	char	s_last_mounted[64];	/* directory where last mounted */
/*C8*/	__le32	s_algorithm_usage_bitmap; /* For compression */
	/*
	 * Performance hints.  Directory preallocation should only
1100
	 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
1101
1102
1103
1104
1105
	 */
	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
	/*
1106
	 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
1107
1108
1109
1110
1111
1112
1113
	 */
/*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
/*E0*/	__le32	s_journal_inum;		/* inode number of journal file */
	__le32	s_journal_dev;		/* device number of journal file */
	__le32	s_last_orphan;		/* start of list of inodes to delete */
	__le32	s_hash_seed[4];		/* HTREE hash seed */
	__u8	s_def_hash_version;	/* Default hash version to use */
1114
	__u8	s_jnl_backup_type;
1115
	__le16  s_desc_size;		/* size of group descriptor */
Laurent Vivier's avatar
Laurent Vivier committed
1116
/*100*/	__le32	s_default_mount_opts;
1117
	__le32	s_first_meta_bg;	/* First metablock block group */
Laurent Vivier's avatar
Laurent Vivier committed
1118
1119
1120
1121
1122
1123
	__le32	s_mkfs_time;		/* When the filesystem was created */
	__le32	s_jnl_blocks[17];	/* Backup of the journal inode */
	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
/*150*/	__le32	s_blocks_count_hi;	/* Blocks count */
	__le32	s_r_blocks_count_hi;	/* Reserved blocks count */
	__le32	s_free_blocks_count_hi;	/* Free blocks count */
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1124
1125
1126
1127
	__le16	s_min_extra_isize;	/* All inodes have at least # bytes */
	__le16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
	__le32	s_flags;		/* Miscellaneous flags */
	__le16  s_raid_stride;		/* RAID stride */
1128
	__le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1129
1130
	__le64  s_mmp_block;            /* Block for multi-mount protection */
	__le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
1131
	__u8	s_log_groups_per_flex;  /* FLEX_BG group size */
1132
	__u8	s_checksum_type;	/* metadata checksum algorithm used */
1133
	__le16  s_reserved_pad;
1134
	__le64	s_kbytes_written;	/* nr of lifetime kilobytes written */
1135
1136
1137
1138
1139
1140
	__le32	s_snapshot_inum;	/* Inode number of active snapshot */
	__le32	s_snapshot_id;		/* sequential ID of active snapshot */
	__le64	s_snapshot_r_blocks_count; /* reserved blocks for active
					      snapshot's future use */
	__le32	s_snapshot_list;	/* inode number of the head of the
					   on-disk snapshot list */
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
	__le32	s_error_count;		/* number of fs errors */
	__le32	s_first_error_time;	/* first time an error happened */
	__le32	s_first_error_ino;	/* inode involved in first error */
	__le64	s_first_error_block;	/* block involved of first error */
	__u8	s_first_error_func[32];	/* function where the error happened */
	__le32	s_first_error_line;	/* line number where error happened */
	__le32	s_last_error_time;	/* most recent time of an error */
	__le32	s_last_error_ino;	/* inode involved in last error */
	__le32	s_last_error_line;	/* line number where error happened */
	__le64	s_last_error_block;	/* block involved of last error */
	__u8	s_last_error_func[32];	/* function where the error happened */
1153
1154
#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
	__u8	s_mount_opts[64];
1155
1156
1157
	__le32	s_usr_quota_inum;	/* inode for tracking user quota */
	__le32	s_grp_quota_inum;	/* inode for tracking group quota */
	__le32	s_overhead_clusters;	/* overhead blocks/clusters in fs */
1158
1159
	__le32	s_reserved[108];	/* Padding to the end of the block */
	__le32	s_checksum;		/* crc32c(superblock) */
1160
1161
};

1162
1163
#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)

1164
#ifdef __KERNEL__
1165
1166

/*
1167
 * run-time mount flags
1168
1169
 */
#define EXT4_MF_MNTDIR_SAMPLED	0x0001
1170
#define EXT4_MF_FS_ABORTED	0x0002	/* Fatal error detected */
1171

1172
1173
1174
1175
1176
1177
1178
/*
 * fourth extended-fs super-block data in memory
 */
struct ext4_sb_info {
	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
	unsigned long s_inodes_per_block;/* Number of inodes per block */
	unsigned long s_blocks_per_group;/* Number of blocks in a group */
1179
	unsigned long s_clusters_per_group; /* Number of clusters in a group */
1180
1181
1182
1183
1184
	unsigned long s_inodes_per_group;/* Number of inodes in a group */
	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
	ext4_group_t s_groups_count;	/* Number of groups in the fs */
1185
	ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
1186
	unsigned long s_overhead;  /* # of fs overhead clusters */
1187
1188
	unsigned int s_cluster_ratio;	/* Number of blocks per cluster */
	unsigned int s_cluster_bits;	/* log2 of s_cluster_ratio */
1189
1190
1191
1192
	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
	struct buffer_head * s_sbh;	/* Buffer containing the super block */
	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
	struct buffer_head **s_group_desc;
1193
	unsigned int s_mount_opt;
1194
	unsigned int s_mount_opt2;
1195
	unsigned int s_mount_flags;
1196
	unsigned int s_def_mount_opt;
1197
	ext4_fsblk_t s_sb_block;
1198
1199
	kuid_t s_resuid;
	kgid_t s_resgid;
1200
1201
1202
1203
1204
1205
1206
	unsigned short s_mount_state;
	unsigned short s_pad;
	int s_addr_per_block_bits;
	int s_desc_per_block_bits;
	int s_inode_size;
	int s_first_ino;
	unsigned int s_inode_readahead_blks;
1207
	unsigned int s_inode_goal;
1208
1209
1210
1211
1212
	spinlock_t s_next_gen_lock;
	u32 s_next_generation;
	u32 s_hash_seed[4];
	int s_def_hash_version;
	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
1213
	struct percpu_counter s_freeclusters_counter;