super.c 149 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/super.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/inode.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/time.h>
23
#include <linux/vmalloc.h>
24
#include <linux/jbd2.h>
25
26
27
28
29
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
#include <linux/buffer_head.h>
30
#include <linux/exportfs.h>
31
32
33
34
35
36
#include <linux/vfs.h>
#include <linux/random.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
37
#include <linux/proc_fs.h>
Theodore Ts'o's avatar
Theodore Ts'o committed
38
#include <linux/ctype.h>
Vignesh Babu's avatar
Vignesh Babu committed
39
#include <linux/log2.h>
40
#include <linux/crc16.h>
Dan Magenheimer's avatar
Dan Magenheimer committed
41
#include <linux/cleancache.h>
42
43
#include <asm/uaccess.h>

44
45
46
#include <linux/kthread.h>
#include <linux/freezer.h>

47
#include "ext4.h"
48
#include "ext4_extents.h"
49
#include "ext4_jbd2.h"
50
51
#include "xattr.h"
#include "acl.h"
52
#include "mballoc.h"
53

54
55
56
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>

57
static struct proc_dir_entry *ext4_proc_root;
Theodore Ts'o's avatar
Theodore Ts'o committed
58
static struct kset *ext4_kset;
59
60
61
static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx;
static struct ext4_features *ext4_feat;
62

63
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
64
			     unsigned long journal_devnum);
65
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
66
static int ext4_commit_super(struct super_block *sb, int sync);
67
68
69
70
static void ext4_mark_recovery_complete(struct super_block *sb,
					struct ext4_super_block *es);
static void ext4_clear_journal_err(struct super_block *sb,
				   struct ext4_super_block *es);
71
static int ext4_sync_fs(struct super_block *sb, int wait);
72
static const char *ext4_decode_error(struct super_block *sb, int errno,
73
				     char nbuf[16]);
74
75
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
76
77
static int ext4_unfreeze(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
Al Viro's avatar
Al Viro committed
78
79
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
		       const char *dev_name, void *data);
80
81
static inline int ext2_feature_set_ok(struct super_block *sb);
static inline int ext3_feature_set_ok(struct super_block *sb);
82
static int ext4_feature_set_ok(struct super_block *sb, int readonly);
83
84
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
85
static void ext4_clear_request_list(void);
86

87
88
89
90
91
92
93
94
95
96
97
98
99
100
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "ext2",
	.mount		= ext4_mount,
	.kill_sb	= kill_block_super,
	.fs_flags	= FS_REQUIRES_DEV,
};
#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
#else
#define IS_EXT2_SB(sb) (0)
#endif


101
102
103
104
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "ext3",
Al Viro's avatar
Al Viro committed
105
	.mount		= ext4_mount,
106
107
108
109
110
111
112
	.kill_sb	= kill_block_super,
	.fs_flags	= FS_REQUIRES_DEV,
};
#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
#else
#define IS_EXT3_SB(sb) (0)
#endif
Laurent Vivier's avatar
Laurent Vivier committed
113

114
115
116
117
118
119
120
121
122
123
static int ext4_verify_csum_type(struct super_block *sb,
				 struct ext4_super_block *es)
{
	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
static __le32 ext4_superblock_csum(struct super_block *sb,
				   struct ext4_super_block *es)
{
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	int offset = offsetof(struct ext4_super_block, s_checksum);
	__u32 csum;

	csum = ext4_chksum(sbi, ~0, (char *)es, offset);

	return cpu_to_le32(csum);
}

int ext4_superblock_csum_verify(struct super_block *sb,
				struct ext4_super_block *es)
{
	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	return es->s_checksum == ext4_superblock_csum(sb, es);
}

void ext4_superblock_csum_set(struct super_block *sb,
			      struct ext4_super_block *es)
{
	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	es->s_checksum = ext4_superblock_csum(sb, es);
}

156
157
158
159
160
161
162
163
164
165
166
167
168
169
void *ext4_kvmalloc(size_t size, gfp_t flags)
{
	void *ret;

	ret = kmalloc(size, flags);
	if (!ret)
		ret = __vmalloc(size, flags, PAGE_KERNEL);
	return ret;
}

void *ext4_kvzalloc(size_t size, gfp_t flags)
{
	void *ret;

170
	ret = kzalloc(size, flags);
171
172
173
174
175
176
177
178
179
180
181
182
183
184
	if (!ret)
		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
	return ret;
}

void ext4_kvfree(void *ptr)
{
	if (is_vmalloc_addr(ptr))
		vfree(ptr);
	else
		kfree(ptr);

}

185
186
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
			       struct ext4_group_desc *bg)
Laurent Vivier's avatar
Laurent Vivier committed
187
{
188
	return le32_to_cpu(bg->bg_block_bitmap_lo) |
189
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
190
		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
Laurent Vivier's avatar
Laurent Vivier committed
191
192
}

193
194
ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
			       struct ext4_group_desc *bg)
Laurent Vivier's avatar
Laurent Vivier committed
195
{
196
	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
197
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
198
		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
Laurent Vivier's avatar
Laurent Vivier committed
199
200
}

201
202
ext4_fsblk_t ext4_inode_table(struct super_block *sb,
			      struct ext4_group_desc *bg)
Laurent Vivier's avatar
Laurent Vivier committed
203
{
204
	return le32_to_cpu(bg->bg_inode_table_lo) |
205
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
206
		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
Laurent Vivier's avatar
Laurent Vivier committed
207
208
}

209
210
__u32 ext4_free_group_clusters(struct super_block *sb,
			       struct ext4_group_desc *bg)
211
212
213
{
	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
214
		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
215
216
217
218
219
220
221
}

__u32 ext4_free_inodes_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
222
		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
223
224
225
226
227
228
229
}

__u32 ext4_used_dirs_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
230
		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
231
232
233
234
235
236
237
}

__u32 ext4_itable_unused_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_itable_unused_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
238
		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
239
240
}

241
242
void ext4_block_bitmap_set(struct super_block *sb,
			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
Laurent Vivier's avatar
Laurent Vivier committed
243
{
244
	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
245
246
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
Laurent Vivier's avatar
Laurent Vivier committed
247
248
}

249
250
void ext4_inode_bitmap_set(struct super_block *sb,
			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
Laurent Vivier's avatar
Laurent Vivier committed
251
{
252
	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
253
254
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
Laurent Vivier's avatar
Laurent Vivier committed
255
256
}

257
258
void ext4_inode_table_set(struct super_block *sb,
			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
Laurent Vivier's avatar
Laurent Vivier committed
259
{
260
	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
261
262
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
Laurent Vivier's avatar
Laurent Vivier committed
263
264
}

265
266
void ext4_free_group_clusters_set(struct super_block *sb,
				  struct ext4_group_desc *bg, __u32 count)
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
{
	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
}

void ext4_free_inodes_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
}

void ext4_used_dirs_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
}

void ext4_itable_unused_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}

297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

/* Just increment the non-pointer handle value */
static handle_t *ext4_get_nojournal(void)
{
	handle_t *handle = current->journal_info;
	unsigned long ref_cnt = (unsigned long)handle;

	BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);

	ref_cnt++;
	handle = (handle_t *)ref_cnt;

	current->journal_info = handle;
	return handle;
}


/* Decrement the non-pointer handle value */
static void ext4_put_nojournal(handle_t *handle)
{
	unsigned long ref_cnt = (unsigned long)handle;

	BUG_ON(ref_cnt == 0);

	ref_cnt--;
	handle = (handle_t *)ref_cnt;

	current->journal_info = handle;
}

327
/*
328
 * Wrappers for jbd2_journal_start/end.
329
 */
330
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
331
332
333
{
	journal_t *journal;

334
	trace_ext4_journal_start(sb, nblocks, _RET_IP_);
335
336
337
	if (sb->s_flags & MS_RDONLY)
		return ERR_PTR(-EROFS);

338
	WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
339
	journal = EXT4_SB(sb)->s_journal;
340
341
342
343
344
345
346
347
348
349
	if (!journal)
		return ext4_get_nojournal();
	/*
	 * Special case here: if the journal has aborted behind our
	 * backs (eg. EIO in the commit thread), then we still need to
	 * take the FS itself readonly cleanly.
	 */
	if (is_journal_aborted(journal)) {
		ext4_abort(sb, "Detected aborted journal");
		return ERR_PTR(-EROFS);
350
	}
351
	return jbd2_journal_start(journal, nblocks);
352
353
}

354
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
355
356
357
358
359
{
	struct super_block *sb;
	int err;
	int rc;

360
	if (!ext4_handle_valid(handle)) {
361
		ext4_put_nojournal(handle);
362
363
		return 0;
	}
364
365
	sb = handle->h_transaction->t_journal->j_private;
	err = handle->h_err;
366
	rc = jbd2_journal_stop(handle);
367
368
369
370

	if (!err)
		err = rc;
	if (err)
371
		__ext4_std_error(sb, where, line, err);
372
373
374
	return err;
}

375
376
377
void ext4_journal_abort_handle(const char *caller, unsigned int line,
			       const char *err_fn, struct buffer_head *bh,
			       handle_t *handle, int err)
378
379
{
	char nbuf[16];
380
	const char *errstr = ext4_decode_error(NULL, err, nbuf);
381

382
383
	BUG_ON(!ext4_handle_valid(handle));

384
385
386
387
388
389
390
391
392
	if (bh)
		BUFFER_TRACE(bh, "abort");

	if (!handle->h_err)
		handle->h_err = err;

	if (is_handle_aborted(handle))
		return;

393
	printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
394
	       caller, line, errstr, err_fn);
395

396
	jbd2_journal_abort_handle(handle);
397
398
}

399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
static void __save_error_info(struct super_block *sb, const char *func,
			    unsigned int line)
{
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;

	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
	es->s_last_error_time = cpu_to_le32(get_seconds());
	strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
	es->s_last_error_line = cpu_to_le32(line);
	if (!es->s_first_error_time) {
		es->s_first_error_time = es->s_last_error_time;
		strncpy(es->s_first_error_func, func,
			sizeof(es->s_first_error_func));
		es->s_first_error_line = cpu_to_le32(line);
		es->s_first_error_ino = es->s_last_error_ino;
		es->s_first_error_block = es->s_last_error_block;
	}
417
418
419
420
421
422
	/*
	 * Start the daily error reporting function if it hasn't been
	 * started already
	 */
	if (!es->s_error_count)
		mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
423
424
425
426
427
428
429
430
431
432
	es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
}

static void save_error_info(struct super_block *sb, const char *func,
			    unsigned int line)
{
	__save_error_info(sb, func, line);
	ext4_commit_super(sb, 1);
}

433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
/*
 * The del_gendisk() function uninitializes the disk-specific data
 * structures, including the bdi structure, without telling anyone
 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 * This is a kludge to prevent these oops until we can put in a proper
 * hook in del_gendisk() to inform the VFS and file system layers.
 */
static int block_device_ejected(struct super_block *sb)
{
	struct inode *bd_inode = sb->s_bdev->bd_inode;
	struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;

	return bdi->dev == NULL;
}

Bobi Jam's avatar
Bobi Jam committed
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
	struct super_block		*sb = journal->j_private;
	struct ext4_sb_info		*sbi = EXT4_SB(sb);
	int				error = is_journal_aborted(journal);
	struct ext4_journal_cb_entry	*jce, *tmp;

	spin_lock(&sbi->s_md_lock);
	list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
		list_del_init(&jce->jce_list);
		spin_unlock(&sbi->s_md_lock);
		jce->jce_func(sb, jce, error);
		spin_lock(&sbi->s_md_lock);
	}
	spin_unlock(&sbi->s_md_lock);
}
465

466
467
468
469
/* Deal with the reporting of failure conditions on a filesystem such as
 * inconsistencies detected or read IO failures.
 *
 * On ext2, we can store the error state of the filesystem in the
470
 * superblock.  That is not possible on ext4, because we may have other
471
472
473
474
475
 * write ordering constraints on the superblock which prevent us from
 * writing it out straight away; and given that the journal is about to
 * be aborted, we can't rely on the current, or future, transactions to
 * write out the superblock safely.
 *
476
 * We'll just use the jbd2_journal_abort() error code to record an error in
477
 * the journal instead.  On recovery, the journal will complain about
478
479
480
 * that error until we've noted it down and cleared it.
 */

481
static void ext4_handle_error(struct super_block *sb)
482
483
484
485
{
	if (sb->s_flags & MS_RDONLY)
		return;

486
	if (!test_opt(sb, ERRORS_CONT)) {
487
		journal_t *journal = EXT4_SB(sb)->s_journal;
488

489
		EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
490
		if (journal)
491
			jbd2_journal_abort(journal, -EIO);
492
	}
493
	if (test_opt(sb, ERRORS_RO)) {
494
		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
495
496
497
		sb->s_flags |= MS_RDONLY;
	}
	if (test_opt(sb, ERRORS_PANIC))
498
		panic("EXT4-fs (device %s): panic forced after error\n",
499
500
501
			sb->s_id);
}

502
void __ext4_error(struct super_block *sb, const char *function,
503
		  unsigned int line, const char *fmt, ...)
504
{
Joe Perches's avatar
Joe Perches committed
505
	struct va_format vaf;
506
507
508
	va_list args;

	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
509
510
511
512
	vaf.fmt = fmt;
	vaf.va = &args;
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
	       sb->s_id, function, line, current->comm, &vaf);
513
	va_end(args);
514
	save_error_info(sb, function, line);
515

516
	ext4_handle_error(sb);
517
518
}

519
520
void ext4_error_inode(struct inode *inode, const char *function,
		      unsigned int line, ext4_fsblk_t block,
521
522
523
		      const char *fmt, ...)
{
	va_list args;
524
	struct va_format vaf;
525
	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
526

527
528
529
	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
	es->s_last_error_block = cpu_to_le64(block);
	save_error_info(inode->i_sb, function, line);
530
	va_start(args, fmt);
531
532
	vaf.fmt = fmt;
	vaf.va = &args;
533
	if (block)
534
535
536
537
538
539
540
541
542
		printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
		       "inode #%lu: block %llu: comm %s: %pV\n",
		       inode->i_sb->s_id, function, line, inode->i_ino,
		       block, current->comm, &vaf);
	else
		printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
		       "inode #%lu: comm %s: %pV\n",
		       inode->i_sb->s_id, function, line, inode->i_ino,
		       current->comm, &vaf);
543
544
545
546
547
	va_end(args);

	ext4_handle_error(inode->i_sb);
}

548
void ext4_error_file(struct file *file, const char *function,
549
550
		     unsigned int line, ext4_fsblk_t block,
		     const char *fmt, ...)
551
552
{
	va_list args;
553
	struct va_format vaf;
554
	struct ext4_super_block *es;
555
556
557
	struct inode *inode = file->f_dentry->d_inode;
	char pathname[80], *path;

558
559
560
	es = EXT4_SB(inode->i_sb)->s_es;
	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
	save_error_info(inode->i_sb, function, line);
561
	path = d_path(&(file->f_path), pathname, sizeof(pathname));
562
	if (IS_ERR(path))
563
		path = "(unknown)";
564
565
566
	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;
567
568
569
570
571
572
573
574
575
576
577
578
	if (block)
		printk(KERN_CRIT
		       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
		       "block %llu: comm %s: path %s: %pV\n",
		       inode->i_sb->s_id, function, line, inode->i_ino,
		       block, current->comm, path, &vaf);
	else
		printk(KERN_CRIT
		       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
		       "comm %s: path %s: %pV\n",
		       inode->i_sb->s_id, function, line, inode->i_ino,
		       current->comm, path, &vaf);
579
580
581
582
583
	va_end(args);

	ext4_handle_error(inode->i_sb);
}

584
static const char *ext4_decode_error(struct super_block *sb, int errno,
585
586
587
588
589
590
591
592
593
594
595
596
				     char nbuf[16])
{
	char *errstr = NULL;

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
597
598
		if (!sb || (EXT4_SB(sb)->s_journal &&
			    EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
			errstr = "Journal has aborted";
		else
			errstr = "Readonly filesystem";
		break;
	default:
		/* If the caller passed in an extra buffer for unknown
		 * errors, textualise them now.  Else we just return
		 * NULL. */
		if (nbuf) {
			/* Check for truncated error codes... */
			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				errstr = nbuf;
		}
		break;
	}

	return errstr;
}

618
/* __ext4_std_error decodes expected errors from journaling functions
619
620
 * automatically and invokes the appropriate error response.  */

621
622
void __ext4_std_error(struct super_block *sb, const char *function,
		      unsigned int line, int errno)
623
624
625
626
627
628
629
630
631
632
633
{
	char nbuf[16];
	const char *errstr;

	/* Special case: if the error is EROFS, and we're not already
	 * inside a transaction, then there's really no point in logging
	 * an error. */
	if (errno == -EROFS && journal_current_handle() == NULL &&
	    (sb->s_flags & MS_RDONLY))
		return;

634
	errstr = ext4_decode_error(sb, errno, nbuf);
635
636
	printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
	       sb->s_id, function, line, errstr);
637
	save_error_info(sb, function, line);
638

639
	ext4_handle_error(sb);
640
641
642
}

/*
643
 * ext4_abort is a much stronger failure handler than ext4_error.  The
644
645
646
647
648
649
650
651
 * abort function may be used to deal with unrecoverable failures such
 * as journal IO errors or ENOMEM at a critical moment in log management.
 *
 * We unconditionally force the filesystem into an ABORT|READONLY state,
 * unless the error response on the fs has been set to panic in which
 * case we take the easy way out and panic immediately.
 */

652
void __ext4_abort(struct super_block *sb, const char *function,
653
		unsigned int line, const char *fmt, ...)
654
655
656
{
	va_list args;

657
	save_error_info(sb, function, line);
658
	va_start(args, fmt);
659
660
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
	       function, line);
661
662
663
664
	vprintk(fmt, args);
	printk("\n");
	va_end(args);

665
666
667
668
669
670
671
672
	if ((sb->s_flags & MS_RDONLY) == 0) {
		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
		sb->s_flags |= MS_RDONLY;
		EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
		if (EXT4_SB(sb)->s_journal)
			jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
		save_error_info(sb, function, line);
	}
673
	if (test_opt(sb, ERRORS_PANIC))
674
		panic("EXT4-fs panic from previous error\n");
675
676
}

Joe Perches's avatar
Joe Perches committed
677
void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
678
{
Joe Perches's avatar
Joe Perches committed
679
	struct va_format vaf;
680
681
682
	va_list args;

	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
683
684
685
	vaf.fmt = fmt;
	vaf.va = &args;
	printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
686
687
688
	va_end(args);
}

689
void __ext4_warning(struct super_block *sb, const char *function,
690
		    unsigned int line, const char *fmt, ...)
691
{
Joe Perches's avatar
Joe Perches committed
692
	struct va_format vaf;
693
694
695
	va_list args;

	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
696
697
698
699
	vaf.fmt = fmt;
	vaf.va = &args;
	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
	       sb->s_id, function, line, &vaf);
700
701
702
	va_end(args);
}

703
704
705
706
void __ext4_grp_locked_error(const char *function, unsigned int line,
			     struct super_block *sb, ext4_group_t grp,
			     unsigned long ino, ext4_fsblk_t block,
			     const char *fmt, ...)
707
708
709
__releases(bitlock)
__acquires(bitlock)
{
Joe Perches's avatar
Joe Perches committed
710
	struct va_format vaf;
711
712
713
	va_list args;
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;

714
715
716
	es->s_last_error_ino = cpu_to_le32(ino);
	es->s_last_error_block = cpu_to_le64(block);
	__save_error_info(sb, function, line);
Joe Perches's avatar
Joe Perches committed
717

718
	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
719
720
721

	vaf.fmt = fmt;
	vaf.va = &args;
722
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
723
724
	       sb->s_id, function, line, grp);
	if (ino)
Joe Perches's avatar
Joe Perches committed
725
		printk(KERN_CONT "inode %lu: ", ino);
726
	if (block)
Joe Perches's avatar
Joe Perches committed
727
728
		printk(KERN_CONT "block %llu:", (unsigned long long) block);
	printk(KERN_CONT "%pV\n", &vaf);
729
730
731
	va_end(args);

	if (test_opt(sb, ERRORS_CONT)) {
732
		ext4_commit_super(sb, 0);
733
734
		return;
	}
735

736
737
738
739
740
741
742
743
	ext4_unlock_group(sb, grp);
	ext4_handle_error(sb);
	/*
	 * We only get here in the ERRORS_RO case; relocking the group
	 * may be dangerous, but nothing bad will happen since the
	 * filesystem will have already been marked read/only and the
	 * journal has been aborted.  We return 1 as a hint to callers
	 * who might what to use the return value from
Lucas De Marchi's avatar
Lucas De Marchi committed
744
	 * ext4_grp_locked_error() to distinguish between the
745
746
747
748
749
750
751
752
	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
	 * aggressively from the ext4 function in question, with a
	 * more appropriate error code.
	 */
	ext4_lock_group(sb, grp);
	return;
}

753
void ext4_update_dynamic_rev(struct super_block *sb)
754
{
755
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
756

757
	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
758
759
		return;

760
	ext4_warning(sb,
761
762
		     "updating to rev %d because of new feature flag, "
		     "running e2fsck is recommended",
763
		     EXT4_DYNAMIC_REV);
764

765
766
767
	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
768
769
770
771
772
773
774
775
776
777
778
779
780
	/* leave es->s_feature_*compat flags alone */
	/* es->s_uuid will be set by e2fsck if empty */

	/*
	 * The rest of the superblock fields should be zero, and if not it
	 * means they are likely already in use, so leave them alone.  We
	 * can leave it up to e2fsck to clean up any inconsistencies there.
	 */
}

/*
 * Open the external journal device
 */
781
static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
782
783
784
785
{
	struct block_device *bdev;
	char b[BDEVNAME_SIZE];

786
	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
787
788
789
790
791
	if (IS_ERR(bdev))
		goto fail;
	return bdev;

fail:
792
	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
793
794
795
796
797
798
799
			__bdevname(dev, b), PTR_ERR(bdev));
	return NULL;
}

/*
 * Release the journal device
 */
800
static int ext4_blkdev_put(struct block_device *bdev)
801
{
802
	return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
803
804
}

805
static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
806
807
808
809
810
811
{
	struct block_device *bdev;
	int ret = -ENODEV;

	bdev = sbi->journal_bdev;
	if (bdev) {
812
		ret = ext4_blkdev_put(bdev);
813
814
815
816
817
818
819
		sbi->journal_bdev = NULL;
	}
	return ret;
}

static inline struct inode *orphan_list_entry(struct list_head *l)
{
820
	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
821
822
}

823
static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
824
825
826
{
	struct list_head *l;

827
828
	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
		 le32_to_cpu(sbi->s_es->s_last_orphan));
829
830
831
832
833
834
835
836
837
838
839
840

	printk(KERN_ERR "sb_info orphan list:\n");
	list_for_each(l, &sbi->s_orphan) {
		struct inode *inode = orphan_list_entry(l);
		printk(KERN_ERR "  "
		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
		       inode->i_sb->s_id, inode->i_ino, inode,
		       inode->i_mode, inode->i_nlink,
		       NEXT_ORPHAN(inode));
	}
}

841
static void ext4_put_super(struct super_block *sb)
842
{
843
844
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;
845
	int i, err;
846

847
	ext4_unregister_li_request(sb);
848
849
	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);

850
851
852
	flush_workqueue(sbi->dio_unwritten_wq);
	destroy_workqueue(sbi->dio_unwritten_wq);

853
	lock_super(sb);
854
855
856
857
	if (sbi->s_journal) {
		err = jbd2_journal_destroy(sbi->s_journal);
		sbi->s_journal = NULL;
		if (err < 0)
858
			ext4_abort(sb, "Couldn't clean up the journal");
859
	}
860

861
	del_timer(&sbi->s_err_report);
862
863
864
865
866
	ext4_release_system_zone(sb);
	ext4_mb_release(sb);
	ext4_ext_release(sb);
	ext4_xattr_put_super(sb);

867
	if (!(sb->s_flags & MS_RDONLY)) {
868
		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
869
870
		es->s_state = cpu_to_le16(sbi->s_mount_state);
	}
871
	if (!(sb->s_flags & MS_RDONLY))
872
873
		ext4_commit_super(sb, 1);

874
	if (sbi->s_proc) {
875
		remove_proc_entry("options", sbi->s_proc);
876
		remove_proc_entry(sb->s_id, ext4_proc_root);
877
	}
Theodore Ts'o's avatar
Theodore Ts'o committed
878
	kobject_del(&sbi->s_kobj);
879
880
881

	for (i = 0; i < sbi->s_gdb_count; i++)
		brelse(sbi->s_group_desc[i]);
882
	ext4_kvfree(sbi->s_group_desc);
883
	ext4_kvfree(sbi->s_flex_groups);
884
	percpu_counter_destroy(&sbi->s_freeclusters_counter);
885
886
	percpu_counter_destroy(&sbi->s_freeinodes_counter);
	percpu_counter_destroy(&sbi->s_dirs_counter);
887
	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
888
889
890
891
892
893
894
895
896
897
898
899
900
901
	brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
	for (i = 0; i < MAXQUOTAS; i++)
		kfree(sbi->s_qf_names[i]);
#endif

	/* Debugging code just in case the in-memory inode orphan list
	 * isn't empty.  The on-disk one can be non-empty if we've
	 * detected an error and taken the fs readonly, but the
	 * in-memory list had better be clean by this point. */
	if (!list_empty(&sbi->s_orphan))
		dump_orphan_list(sb, sbi);
	J_ASSERT(list_empty(&sbi->s_orphan));

902
	invalidate_bdev(sb->s_bdev);
903
904
905
906
907
908
909
	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
		/*
		 * Invalidate the journal device's buffers.  We don't want them
		 * floating about in memory - the physical journal device may
		 * hotswapped, and it breaks the `ro-after' testing code.
		 */
		sync_blockdev(sbi->journal_bdev);
910
		invalidate_bdev(sbi->journal_bdev);
911
		ext4_blkdev_remove(sbi);
912
	}
913
914
	if (sbi->s_mmp_tsk)
		kthread_stop(sbi->s_mmp_tsk);
915
	sb->s_fs_info = NULL;
Theodore Ts'o's avatar
Theodore Ts'o committed
916
917
918
919
920
921
922
	/*
	 * Now that we are completely done shutting down the
	 * superblock, we need to actually destroy the kobject.
	 */
	unlock_super(sb);
	kobject_put(&sbi->s_kobj);
	wait_for_completion(&sbi->s_kobj_unregister);
923
924
	if (sbi->s_chksum_driver)
		crypto_free_shash(sbi->s_chksum_driver);
925
	kfree(sbi->s_blockgroup_lock);
926
927
928
	kfree(sbi);
}

929
static struct kmem_cache *ext4_inode_cachep;
930
931
932
933

/*
 * Called inside transaction, so use GFP_NOFS
 */
934
static struct inode *ext4_alloc_inode(struct super_block *sb)
935
{
936
	struct ext4_inode_info *ei;
937

938
	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
939
940
	if (!ei)
		return NULL;
941

942
	ei->vfs_inode.i_version = 1;
943
	ei->vfs_inode.i_data.writeback_index = 0;
944
	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
945
946
	INIT_LIST_HEAD(&ei->i_prealloc_list);
	spin_lock_init(&ei->i_prealloc_lock);
947
948
949
	ei->i_reserved_data_blocks = 0;
	ei->i_reserved_meta_blocks = 0;
	ei->i_allocated_meta_blocks = 0;
950
	ei->i_da_metadata_calc_len = 0;
951
	spin_lock_init(&(ei->i_block_reservation_lock));
952
953
954
#ifdef CONFIG_QUOTA
	ei->i_reserved_quota = 0;
#endif
955
	ei->jinode = NULL;
956
	INIT_LIST_HEAD(&ei->i_completed_io_list);
957
	spin_lock_init(&ei->i_completed_io_lock);
958
	ei->cur_aio_dio = NULL;
959
960
	ei->i_sync_tid = 0;
	ei->i_datasync_tid = 0;
961
	atomic_set(&ei->i_ioend_count, 0);
962
	atomic_set(&ei->i_aiodio_unwritten, 0);
963

964
965
966
	return &ei->vfs_inode;
}

967
968
969
970
971
972
973
974
static int ext4_drop_inode(struct inode *inode)
{
	int drop = generic_drop_inode(inode);

	trace_ext4_drop_inode(inode, drop);
	return drop;
}

Nick Piggin's avatar
Nick Piggin committed
975
976
977
978
979
980
static void ext4_i_callback(struct rcu_head *head)
{
	struct inode *inode = container_of(head, struct inode, i_rcu);
	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}

981
static void ext4_destroy_inode(struct inode *inode)
982
{
983
	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
984
985
986
		ext4_msg(inode->i_sb, KERN_ERR,
			 "Inode %lu (%p): orphan list check failed!",
			 inode->i_ino, EXT4_I(inode));
987
988
989
990
991
		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
				EXT4_I(inode), sizeof(struct ext4_inode_info),
				true);
		dump_stack();
	}
Nick Piggin's avatar
Nick Piggin committed
992
	call_rcu(&inode->i_rcu, ext4_i_callback);
993
994
}

995
static void init_once(void *foo)
996
{
997
	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
998

999
	INIT_LIST_HEAD(&ei->i_orphan);
Theodore Ts'o's avatar
Theodore Ts'o committed
1000
#ifdef CONFIG_EXT4_FS_XATTR
For faster browsing, not all history is shown. View entire blame