inode.c 139 KB
Newer Older
4001
	}
4002
	return 0;
4003
4004
}

4005
4006
4007
4008
4009
4010
4011
/*
 * Post the struct inode info into an on-disk inode location in the
 * buffer-cache.  This gobbles the caller's reference to the
 * buffer_head in the inode location struct.
 *
 * The caller must have write access to iloc->bh.
 */
4012
static int ext4_do_update_inode(handle_t *handle,
4013
				struct inode *inode,
4014
				struct ext4_iloc *iloc)
4015
{
4016
4017
	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
	struct ext4_inode_info *ei = EXT4_I(inode);
4018
4019
	struct buffer_head *bh = iloc->bh;
	int err = 0, rc, block;
4020
	int need_datasync = 0;
4021
4022
	uid_t i_uid;
	gid_t i_gid;
4023
4024
4025

	/* For fields not not tracking in the in-memory inode,
	 * initialise them to zero for new inodes. */
4026
	if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
4027
		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
4028

4029
	ext4_get_inode_flags(ei);
4030
	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4031
4032
	i_uid = i_uid_read(inode);
	i_gid = i_gid_read(inode);
4033
	if (!(test_opt(inode->i_sb, NO_UID32))) {
4034
4035
		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
4036
4037
4038
4039
/*
 * Fix up interoperability with old kernels. Otherwise, old inodes get
 * re-used with the upper 16 bits of the uid/gid intact
 */
4040
		if (!ei->i_dtime) {
4041
			raw_inode->i_uid_high =
4042
				cpu_to_le16(high_16_bits(i_uid));
4043
			raw_inode->i_gid_high =
4044
				cpu_to_le16(high_16_bits(i_gid));
4045
4046
4047
4048
4049
		} else {
			raw_inode->i_uid_high = 0;
			raw_inode->i_gid_high = 0;
		}
	} else {
4050
4051
		raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
		raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid));
4052
4053
4054
4055
		raw_inode->i_uid_high = 0;
		raw_inode->i_gid_high = 0;
	}
	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
Kalpak Shah's avatar
Kalpak Shah committed
4056
4057
4058
4059
4060
4061

	EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
	EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
	EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
	EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);

4062
4063
	if (ext4_inode_blocks_set(handle, raw_inode, ei))
		goto out_brelse;
4064
	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4065
	raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4066
4067
	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
	    cpu_to_le32(EXT4_OS_HURD))
4068
4069
		raw_inode->i_file_acl_high =
			cpu_to_le16(ei->i_file_acl >> 32);
4070
	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
4071
4072
4073
4074
	if (ei->i_disksize != ext4_isize(raw_inode)) {
		ext4_isize_set(raw_inode, ei->i_disksize);
		need_datasync = 1;
	}
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
	if (ei->i_disksize > 0x7fffffffULL) {
		struct super_block *sb = inode->i_sb;
		if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
				EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
				EXT4_SB(sb)->s_es->s_rev_level ==
				cpu_to_le32(EXT4_GOOD_OLD_REV)) {
			/* If this is the first large file
			 * created, add a flag to the superblock.
			 */
			err = ext4_journal_get_write_access(handle,
					EXT4_SB(sb)->s_sbh);
			if (err)
				goto out_brelse;
			ext4_update_dynamic_rev(sb);
			EXT4_SET_RO_COMPAT_FEATURE(sb,
4090
					EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4091
			ext4_handle_sync(handle);
4092
			err = ext4_handle_dirty_super(handle, sb);
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
		}
	}
	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
		if (old_valid_dev(inode->i_rdev)) {
			raw_inode->i_block[0] =
				cpu_to_le32(old_encode_dev(inode->i_rdev));
			raw_inode->i_block[1] = 0;
		} else {
			raw_inode->i_block[0] = 0;
			raw_inode->i_block[1] =
				cpu_to_le32(new_encode_dev(inode->i_rdev));
			raw_inode->i_block[2] = 0;
		}
4107
4108
4109
	} else
		for (block = 0; block < EXT4_N_BLOCKS; block++)
			raw_inode->i_block[block] = ei->i_data[block];
4110

4111
4112
4113
4114
4115
	raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
	if (ei->i_extra_isize) {
		if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
			raw_inode->i_version_hi =
			cpu_to_le32(inode->i_version >> 32);
4116
		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4117
4118
	}

4119
4120
	ext4_inode_csum_set(inode, raw_inode, ei);

4121
	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4122
	rc = ext4_handle_dirty_metadata(handle, NULL, bh);
4123
4124
	if (!err)
		err = rc;
4125
	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4126

4127
	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4128
out_brelse:
4129
	brelse(bh);
4130
	ext4_std_error(inode->i_sb, err);
4131
4132
4133
4134
	return err;
}

/*
4135
 * ext4_write_inode()
4136
4137
4138
4139
4140
 *
 * We are called from a few places:
 *
 * - Within generic_file_write() for O_SYNC files.
 *   Here, there will be no transaction running. We wait for any running
4141
 *   transaction to commit.
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
 *
 * - Within sys_sync(), kupdate and such.
 *   We wait on commit, if tol to.
 *
 * - Within prune_icache() (PF_MEMALLOC == true)
 *   Here we simply return.  We can't afford to block kswapd on the
 *   journal commit.
 *
 * In all cases it is actually safe for us to return without doing anything,
 * because the inode has been copied into a raw inode buffer in
4152
 * ext4_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
 * knfsd.
 *
 * Note that we are absolutely dependent upon all inode dirtiers doing the
 * right thing: they *must* call mark_inode_dirty() after dirtying info in
 * which we are interested.
 *
 * It would be a bug for them to not do this.  The code:
 *
 *	mark_inode_dirty(inode)
 *	stuff();
 *	inode->i_size = expr;
 *
 * is in error because a kswapd-driven write_inode() could occur while
 * `stuff()' is running, and the new i_size will be lost.  Plus the inode
 * will no longer be on the superblock's dirty inode list.
 */
4169
int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4170
{
4171
4172
	int err;

4173
4174
4175
	if (current->flags & PF_MEMALLOC)
		return 0;

4176
4177
4178
4179
4180
4181
	if (EXT4_SB(inode->i_sb)->s_journal) {
		if (ext4_journal_current_handle()) {
			jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
			dump_stack();
			return -EIO;
		}
4182

4183
		if (wbc->sync_mode != WB_SYNC_ALL)
4184
4185
4186
4187
4188
			return 0;

		err = ext4_force_commit(inode->i_sb);
	} else {
		struct ext4_iloc iloc;
4189

4190
		err = __ext4_get_inode_loc(inode, &iloc, 0);
4191
4192
		if (err)
			return err;
4193
		if (wbc->sync_mode == WB_SYNC_ALL)
4194
4195
			sync_dirty_buffer(iloc.bh);
		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
4196
4197
			EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
					 "IO error syncing inode");
4198
4199
			err = -EIO;
		}
4200
		brelse(iloc.bh);
4201
4202
	}
	return err;
4203
4204
4205
}

/*
4206
 * ext4_setattr()
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
 *
 * Called from notify_change.
 *
 * We want to trap VFS attempts to truncate the file as soon as
 * possible.  In particular, we want to make sure that when the VFS
 * shrinks i_size, we put the inode on the orphan list and modify
 * i_disksize immediately, so that during the subsequent flushing of
 * dirty pages and freeing of disk blocks, we can guarantee that any
 * commit will leave the blocks being flushed in an unused state on
 * disk.  (On recovery, the inode will get truncated and the blocks will
 * be freed, so we have a strong guarantee that no future commit will
 * leave these blocks visible to the user.)
 *
4220
4221
4222
4223
4224
4225
4226
4227
 * Another thing we have to assure is that if we are in ordered mode
 * and inode is still attached to the committing transaction, we must
 * we start writeout of all the dirty pages which are being truncated.
 * This way we are sure that all the data written in the previous
 * transaction are already on disk (truncate waits for pages under
 * writeback).
 *
 * Called with inode->i_mutex down.
4228
 */
4229
int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4230
4231
4232
{
	struct inode *inode = dentry->d_inode;
	int error, rc = 0;
4233
	int orphan = 0;
4234
4235
4236
4237
4238
4239
	const unsigned int ia_valid = attr->ia_valid;

	error = inode_change_ok(inode, attr);
	if (error)
		return error;

4240
	if (is_quota_modification(inode, attr))
4241
		dquot_initialize(inode);
4242
4243
	if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
	    (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
4244
4245
4246
4247
		handle_t *handle;

		/* (user+group)*(old+new) structure, inode write (sb,
		 * inode block, ? - but truncate inode update has it) */
Dmitry Monakhov's avatar
Dmitry Monakhov committed
4248
		handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
4249
					EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
4250
4251
4252
4253
		if (IS_ERR(handle)) {
			error = PTR_ERR(handle);
			goto err_out;
		}
4254
		error = dquot_transfer(inode, attr);
4255
		if (error) {
4256
			ext4_journal_stop(handle);
4257
4258
4259
4260
4261
4262
4263
4264
			return error;
		}
		/* Update corresponding info in inode so that everything is in
		 * one transaction */
		if (attr->ia_valid & ATTR_UID)
			inode->i_uid = attr->ia_uid;
		if (attr->ia_valid & ATTR_GID)
			inode->i_gid = attr->ia_gid;
4265
4266
		error = ext4_mark_inode_dirty(handle, inode);
		ext4_journal_stop(handle);
4267
4268
	}

4269
	if (attr->ia_valid & ATTR_SIZE) {
4270

4271
		if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4272
4273
			struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

4274
4275
			if (attr->ia_size > sbi->s_bitmap_maxbytes)
				return -EFBIG;
4276
4277
4278
		}
	}

4279
	if (S_ISREG(inode->i_mode) &&
4280
	    attr->ia_valid & ATTR_SIZE &&
4281
	    (attr->ia_size < inode->i_size)) {
4282
4283
		handle_t *handle;

4284
		handle = ext4_journal_start(inode, 3);
4285
4286
4287
4288
		if (IS_ERR(handle)) {
			error = PTR_ERR(handle);
			goto err_out;
		}
4289
4290
4291
4292
		if (ext4_handle_valid(handle)) {
			error = ext4_orphan_add(handle, inode);
			orphan = 1;
		}
4293
4294
		EXT4_I(inode)->i_disksize = attr->ia_size;
		rc = ext4_mark_inode_dirty(handle, inode);
4295
4296
		if (!error)
			error = rc;
4297
		ext4_journal_stop(handle);
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309

		if (ext4_should_order_data(inode)) {
			error = ext4_begin_ordered_truncate(inode,
							    attr->ia_size);
			if (error) {
				/* Do as much error cleanup as possible */
				handle = ext4_journal_start(inode, 3);
				if (IS_ERR(handle)) {
					ext4_orphan_del(NULL, inode);
					goto err_out;
				}
				ext4_orphan_del(handle, inode);
4310
				orphan = 0;
4311
4312
4313
4314
				ext4_journal_stop(handle);
				goto err_out;
			}
		}
4315
4316
	}

4317
	if (attr->ia_valid & ATTR_SIZE) {
4318
		if (attr->ia_size != i_size_read(inode)) {
4319
			truncate_setsize(inode, attr->ia_size);
4320
4321
4322
4323
4324
			/* Inode size will be reduced, wait for dio in flight.
			 * Temporarily disable dioread_nolock to prevent
			 * livelock. */
			if (orphan) {
				ext4_inode_block_unlocked_dio(inode);
4325
				inode_dio_wait(inode);
4326
4327
				ext4_inode_resume_unlocked_dio(inode);
			}
4328
		}
4329
		ext4_truncate(inode);
4330
	}
4331

Christoph Hellwig's avatar
Christoph Hellwig committed
4332
4333
4334
4335
4336
4337
4338
4339
4340
	if (!rc) {
		setattr_copy(inode, attr);
		mark_inode_dirty(inode);
	}

	/*
	 * If the call to ext4_truncate failed to get a transaction handle at
	 * all, we need to clean up the in-core orphan list manually.
	 */
4341
	if (orphan && inode->i_nlink)
4342
		ext4_orphan_del(NULL, inode);
4343
4344

	if (!rc && (ia_valid & ATTR_MODE))
4345
		rc = ext4_acl_chmod(inode);
4346
4347

err_out:
4348
	ext4_std_error(inode->i_sb, error);
4349
4350
4351
4352
4353
	if (!error)
		error = rc;
	return error;
}

4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
		 struct kstat *stat)
{
	struct inode *inode;
	unsigned long delalloc_blocks;

	inode = dentry->d_inode;
	generic_fillattr(inode, stat);

	/*
	 * We can't update i_blocks if the block allocation is delayed
	 * otherwise in the case of system crash before the real block
	 * allocation is done, we will have i_blocks inconsistent with
	 * on-disk file blocks.
	 * We always keep i_blocks updated together with real
	 * allocation. But to not confuse with user, stat
	 * will return the blocks that include the delayed allocation
	 * blocks for this file.
	 */
4373
4374
	delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
				EXT4_I(inode)->i_reserved_data_blocks);
4375
4376
4377
4378

	stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
	return 0;
}
4379

4380
4381
static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
4382
	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4383
		return ext4_ind_trans_blocks(inode, nrblocks, chunk);
4384
	return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
4385
}
4386

4387
/*
4388
4389
4390
 * Account for index blocks, block groups bitmaps and block group
 * descriptor blocks if modify datablocks and index blocks
 * worse case, the indexs blocks spread over different block groups
4391
 *
4392
 * If datablocks are discontiguous, they are possible to spread over
4393
 * different block groups too. If they are contiguous, with flexbg,
4394
 * they could still across block group boundary.
4395
 *
4396
4397
 * Also account for superblock, inode, quota and xattr blocks
 */
4398
static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4399
{
4400
4401
	ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
	int gdpblocks;
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
	int idxblocks;
	int ret = 0;

	/*
	 * How many index blocks need to touch to modify nrblocks?
	 * The "Chunk" flag indicating whether the nrblocks is
	 * physically contiguous on disk
	 *
	 * For Direct IO and fallocate, they calls get_block to allocate
	 * one single extent at a time, so they could set the "Chunk" flag
	 */
	idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);

	ret = idxblocks;

	/*
	 * Now let's see how many group bitmaps and group descriptors need
	 * to account
	 */
	groups = idxblocks;
	if (chunk)
		groups += 1;
	else
		groups += nrblocks;

	gdpblocks = groups;
4428
4429
	if (groups > ngroups)
		groups = ngroups;
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
	if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
		gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;

	/* bitmaps and block group descriptor blocks */
	ret += groups + gdpblocks;

	/* Blocks for super block, inode, quota and xattr blocks */
	ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);

	return ret;
}

/*
Lucas De Marchi's avatar
Lucas De Marchi committed
4443
 * Calculate the total number of credits to reserve to fit
4444
4445
 * the modification of a single pages into a single transaction,
 * which may include multiple chunks of block allocations.
4446
 *
4447
 * This could be called via ext4_write_begin()
4448
 *
4449
 * We need to consider the worse case, when
4450
 * one new block per extent.
4451
 */
4452
int ext4_writepage_trans_blocks(struct inode *inode)
4453
{
4454
	int bpp = ext4_journal_blocks_per_page(inode);
4455
4456
	int ret;

4457
	ret = ext4_meta_trans_blocks(inode, bpp, 0);
4458

4459
	/* Account for data blocks for journalled mode */
4460
	if (ext4_should_journal_data(inode))
4461
		ret += bpp;
4462
4463
	return ret;
}
4464
4465
4466
4467
4468

/*
 * Calculate the journal credits for a chunk of data modification.
 *
 * This is called from DIO, fallocate or whoever calling
4469
 * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
4470
4471
4472
4473
4474
4475
4476
4477
4478
 *
 * journal buffers for data blocks are not included here, as DIO
 * and fallocate do no need to journal data buffers.
 */
int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
{
	return ext4_meta_trans_blocks(inode, nrblocks, 1);
}

4479
/*
4480
 * The caller must have previously called ext4_reserve_inode_write().
4481
4482
 * Give this, we know that the caller already has write access to iloc->bh.
 */
4483
int ext4_mark_iloc_dirty(handle_t *handle,
4484
			 struct inode *inode, struct ext4_iloc *iloc)
4485
4486
4487
{
	int err = 0;

4488
	if (IS_I_VERSION(inode))
4489
4490
		inode_inc_iversion(inode);

4491
4492
4493
	/* the do_update_inode consumes one bh->b_count */
	get_bh(iloc->bh);

4494
	/* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
4495
	err = ext4_do_update_inode(handle, inode, iloc);
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
	put_bh(iloc->bh);
	return err;
}

/*
 * On success, We end up with an outstanding reference count against
 * iloc->bh.  This _must_ be cleaned up later.
 */

int
4506
4507
ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
			 struct ext4_iloc *iloc)
4508
{
4509
4510
4511
4512
4513
4514
4515
4516
4517
	int err;

	err = ext4_get_inode_loc(inode, iloc);
	if (!err) {
		BUFFER_TRACE(iloc->bh, "get_write_access");
		err = ext4_journal_get_write_access(handle, iloc->bh);
		if (err) {
			brelse(iloc->bh);
			iloc->bh = NULL;
4518
4519
		}
	}
4520
	ext4_std_error(inode->i_sb, err);
4521
4522
4523
	return err;
}

4524
4525
4526
4527
/*
 * Expand an inode by new_extra_isize bytes.
 * Returns 0 on success or negative error number on failure.
 */
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
4528
4529
4530
4531
static int ext4_expand_extra_isize(struct inode *inode,
				   unsigned int new_extra_isize,
				   struct ext4_iloc iloc,
				   handle_t *handle)
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
{
	struct ext4_inode *raw_inode;
	struct ext4_xattr_ibody_header *header;

	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
		return 0;

	raw_inode = ext4_raw_inode(&iloc);

	header = IHDR(inode, raw_inode);

	/* No extended attributes present */
4544
4545
	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
	    header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
			new_extra_isize);
		EXT4_I(inode)->i_extra_isize = new_extra_isize;
		return 0;
	}

	/* try to expand with EAs present */
	return ext4_expand_extra_isize_ea(inode, new_extra_isize,
					  raw_inode, handle);
}

4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
/*
 * What we do here is to mark the in-core inode as clean with respect to inode
 * dirtiness (it may still be data-dirty).
 * This means that the in-core inode may be reaped by prune_icache
 * without having to perform any I/O.  This is a very good thing,
 * because *any* task may call prune_icache - even ones which
 * have a transaction open against a different journal.
 *
 * Is this cheating?  Not really.  Sure, we haven't written the
 * inode out, but prune_icache isn't a user-visible syncing function.
 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
 * we start and wait on commits.
 */
4570
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4571
{
4572
	struct ext4_iloc iloc;
4573
4574
4575
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	static unsigned int mnt_count;
	int err, ret;
4576
4577

	might_sleep();
4578
	trace_ext4_mark_inode_dirty(inode, _RET_IP_);
4579
	err = ext4_reserve_inode_write(handle, inode, &iloc);
4580
4581
	if (ext4_handle_valid(handle) &&
	    EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
4582
	    !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
		/*
		 * We need extra buffer credits since we may write into EA block
		 * with this same handle. If journal_extend fails, then it will
		 * only result in a minor loss of functionality for that inode.
		 * If this is felt to be critical, then e2fsck should be run to
		 * force a large enough s_min_extra_isize.
		 */
		if ((jbd2_journal_extend(handle,
			     EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
			ret = ext4_expand_extra_isize(inode,
						      sbi->s_want_extra_isize,
						      iloc, handle);
			if (ret) {
4596
4597
				ext4_set_inode_state(inode,
						     EXT4_STATE_NO_EXPAND);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
4598
4599
				if (mnt_count !=
					le16_to_cpu(sbi->s_es->s_mnt_count)) {
4600
					ext4_warning(inode->i_sb,
4601
4602
4603
					"Unable to expand inode %lu. Delete"
					" some EAs or run e2fsck.",
					inode->i_ino);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
4604
4605
					mnt_count =
					  le16_to_cpu(sbi->s_es->s_mnt_count);
4606
4607
4608
4609
				}
			}
		}
	}
4610
	if (!err)
4611
		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
4612
4613
4614
4615
	return err;
}

/*
4616
 * ext4_dirty_inode() is called from __mark_inode_dirty()
4617
4618
4619
4620
4621
 *
 * We're really interested in the case where a file is being extended.
 * i_size has been changed by generic_commit_write() and we thus need
 * to include the updated inode in the current transaction.
 *
4622
 * Also, dquot_alloc_block() will always dirty the inode when blocks
4623
4624
4625
4626
4627
4628
 * are allocated to the file.
 *
 * If the inode is marked synchronous, we don't honour that here - doing
 * so would cause a commit on atime updates, which we don't bother doing.
 * We handle synchronous inodes at the highest possible level.
 */
4629
void ext4_dirty_inode(struct inode *inode, int flags)
4630
4631
4632
{
	handle_t *handle;

4633
	handle = ext4_journal_start(inode, 2);
4634
4635
	if (IS_ERR(handle))
		goto out;
4636
4637
4638

	ext4_mark_inode_dirty(handle, inode);

4639
	ext4_journal_stop(handle);
4640
4641
4642
4643
4644
4645
4646
4647
out:
	return;
}

#if 0
/*
 * Bind an inode's backing buffer_head into this transaction, to prevent
 * it from being flushed to disk early.  Unlike
4648
 * ext4_reserve_inode_write, this leaves behind no bh reference and
4649
4650
4651
 * returns no iloc structure, so the caller needs to repeat the iloc
 * lookup to mark the inode dirty later.
 */
4652
static int ext4_pin_inode(handle_t *handle, struct inode *inode)
4653
{
4654
	struct ext4_iloc iloc;
4655
4656
4657

	int err = 0;
	if (handle) {
4658
		err = ext4_get_inode_loc(inode, &iloc);
4659
4660
		if (!err) {
			BUFFER_TRACE(iloc.bh, "get_write_access");
4661
			err = jbd2_journal_get_write_access(handle, iloc.bh);
4662
			if (!err)
4663
				err = ext4_handle_dirty_metadata(handle,
4664
								 NULL,
4665
								 iloc.bh);
4666
4667
4668
			brelse(iloc.bh);
		}
	}
4669
	ext4_std_error(inode->i_sb, err);
4670
4671
4672
4673
	return err;
}
#endif

4674
int ext4_change_inode_journal_flag(struct inode *inode, int val)
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
{
	journal_t *journal;
	handle_t *handle;
	int err;

	/*
	 * We have to be very careful here: changing a data block's
	 * journaling status dynamically is dangerous.  If we write a
	 * data block to the journal, change the status and then delete
	 * that block, we risk forgetting to revoke the old log record
	 * from the journal and so a subsequent replay can corrupt data.
	 * So, first we make sure that the journal is empty and that
	 * nobody is changing anything.
	 */

4690
	journal = EXT4_JOURNAL(inode);
4691
4692
	if (!journal)
		return 0;
4693
	if (is_journal_aborted(journal))
4694
		return -EROFS;
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
	/* We have to allocate physical blocks for delalloc blocks
	 * before flushing journal. otherwise delalloc blocks can not
	 * be allocated any more. even more truncate on delalloc blocks
	 * could trigger BUG by flushing delalloc blocks in journal.
	 * There is no delalloc block in non-journal data mode.
	 */
	if (val && test_opt(inode->i_sb, DELALLOC)) {
		err = ext4_alloc_da_blocks(inode);
		if (err < 0)
			return err;
	}
4706

4707
4708
4709
4710
	/* Wait for all existing dio workers */
	ext4_inode_block_unlocked_dio(inode);
	inode_dio_wait(inode);

4711
	jbd2_journal_lock_updates(journal);
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721

	/*
	 * OK, there are no updates running now, and all cached data is
	 * synced to disk.  We are now in a completely consistent state
	 * which doesn't have anything in the journal, and we know that
	 * no filesystem updates are running, so it is safe to modify
	 * the inode's in-core data-journaling state flag now.
	 */

	if (val)
4722
		ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4723
4724
	else {
		jbd2_journal_flush(journal);
4725
		ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4726
	}
4727
	ext4_set_aops(inode);
4728

4729
	jbd2_journal_unlock_updates(journal);
4730
	ext4_inode_resume_unlocked_dio(inode);
4731
4732
4733

	/* Finally we can mark the inode as dirty. */

4734
	handle = ext4_journal_start(inode, 1);
4735
4736
4737
	if (IS_ERR(handle))
		return PTR_ERR(handle);

4738
	err = ext4_mark_inode_dirty(handle, inode);
4739
	ext4_handle_sync(handle);
4740
4741
	ext4_journal_stop(handle);
	ext4_std_error(inode->i_sb, err);
4742
4743
4744

	return err;
}
4745
4746
4747
4748
4749
4750

static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
{
	return !buffer_mapped(bh);
}

4751
int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4752
{
4753
	struct page *page = vmf->page;
4754
4755
	loff_t size;
	unsigned long len;
4756
	int ret;
4757
4758
4759
	struct file *file = vma->vm_file;
	struct inode *inode = file->f_path.dentry->d_inode;
	struct address_space *mapping = inode->i_mapping;
4760
4761
4762
	handle_t *handle;
	get_block_t *get_block;
	int retries = 0;
4763

4764
	sb_start_pagefault(inode->i_sb);
4765
	file_update_time(vma->vm_file);
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
	/* Delalloc case is easy... */
	if (test_opt(inode->i_sb, DELALLOC) &&
	    !ext4_should_journal_data(inode) &&
	    !ext4_nonda_switch(inode->i_sb)) {
		do {
			ret = __block_page_mkwrite(vma, vmf,
						   ext4_da_get_block_prep);
		} while (ret == -ENOSPC &&
		       ext4_should_retry_alloc(inode->i_sb, &retries));
		goto out_ret;
4776
	}
4777
4778

	lock_page(page);
4779
4780
4781
4782
4783
4784
	size = i_size_read(inode);
	/* Page got truncated from under us? */
	if (page->mapping != mapping || page_offset(page) > size) {
		unlock_page(page);
		ret = VM_FAULT_NOPAGE;
		goto out;
4785
	}
4786
4787
4788
4789
4790

	if (page->index == size >> PAGE_CACHE_SHIFT)
		len = size & ~PAGE_CACHE_MASK;
	else
		len = PAGE_CACHE_SIZE;
4791
	/*
4792
4793
	 * Return if we have all the buffers mapped. This avoids the need to do
	 * journal_start/journal_stop which can block and take a long time
4794
	 */
4795
4796
	if (page_has_buffers(page)) {
		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
4797
					ext4_bh_unmapped)) {
4798
4799
4800
4801
			/* Wait so that we don't change page under IO */
			wait_on_page_writeback(page);
			ret = VM_FAULT_LOCKED;
			goto out;
4802
		}
4803
	}
4804
	unlock_page(page);
4805
4806
4807
4808
4809
4810
4811
4812
	/* OK, we need to fill the hole... */
	if (ext4_should_dioread_nolock(inode))
		get_block = ext4_get_block_write;
	else
		get_block = ext4_get_block;
retry_alloc:
	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
	if (IS_ERR(handle)) {
4813
		ret = VM_FAULT_SIGBUS;
4814
4815
4816
4817
4818
4819
4820
4821
		goto out;
	}
	ret = __block_page_mkwrite(vma, vmf, get_block);
	if (!ret && ext4_should_journal_data(inode)) {
		if (walk_page_buffers(handle, page_buffers(page), 0,
			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
			unlock_page(page);
			ret = VM_FAULT_SIGBUS;
4822
			ext4_journal_stop(handle);
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
			goto out;
		}
		ext4_set_inode_state(inode, EXT4_STATE_JDATA);
	}
	ext4_journal_stop(handle);
	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
		goto retry_alloc;
out_ret:
	ret = block_page_mkwrite_return(ret);
out:
4833
	sb_end_pagefault(inode->i_sb);
4834
4835
	return ret;
}
For faster browsing, not all history is shown. View entire blame