extents.c 69.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/*
 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
 * Written by Alex Tomas <alex@clusterfs.com>
 *
 * Architecture independence:
 *   Copyright (c) 2005, Bull S.A.
 *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public Licens
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
 */

/*
 * Extents support for EXT4
 *
 * TODO:
 *   - ext4*_error() should be used in some situations
 *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
 *   - smart tree reduction
 */

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/ext4_jbd2.h>
36
#include <linux/jbd2.h>
37
38
39
40
41
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/slab.h>
Amit Arora's avatar
Amit Arora committed
42
#include <linux/falloc.h>
43
44
45
46
#include <linux/ext4_fs_extents.h>
#include <asm/uaccess.h>


47
48
49
50
/*
 * ext_pblock:
 * combine low and high parts of physical block number into ext4_fsblk_t
 */
51
static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
52
53
54
{
	ext4_fsblk_t block;

55
	block = le32_to_cpu(ex->ee_start_lo);
56
	block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
57
58
59
	return block;
}

60
61
62
63
/*
 * idx_pblock:
 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
 */
64
ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
65
66
67
{
	ext4_fsblk_t block;

68
	block = le32_to_cpu(ix->ei_leaf_lo);
69
	block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
70
71
72
	return block;
}

73
74
75
76
77
/*
 * ext4_ext_store_pblock:
 * stores a large physical block number into an extent struct,
 * breaking it into parts
 */
78
void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
79
{
80
	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
81
	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
82
83
}

84
85
86
87
88
/*
 * ext4_idx_store_pblock:
 * stores a large physical block number into an index struct,
 * breaking it into parts
 */
89
static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
90
{
91
	ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
92
	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
93
94
}

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
{
	int err;

	if (handle->h_buffer_credits > needed)
		return handle;
	if (!ext4_journal_extend(handle, needed))
		return handle;
	err = ext4_journal_restart(handle, needed);

	return handle;
}

/*
 * could return:
 *  - EROFS
 *  - ENOMEM
 */
static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
				struct ext4_ext_path *path)
{
	if (path->p_bh) {
		/* path points to block */
		return ext4_journal_get_write_access(handle, path->p_bh);
	}
	/* path points to leaf/index in inode body */
	/* we use in-core data, no need to protect them */
	return 0;
}

/*
 * could return:
 *  - EROFS
 *  - ENOMEM
 *  - EIO
 */
static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
				struct ext4_ext_path *path)
{
	int err;
	if (path->p_bh) {
		/* path points to block */
		err = ext4_journal_dirty_metadata(handle, path->p_bh);
	} else {
		/* path points to leaf/index in inode body */
		err = ext4_mark_inode_dirty(handle, inode);
	}
	return err;
}

145
static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
146
			      struct ext4_ext_path *path,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
147
			      ext4_lblk_t block)
148
149
{
	struct ext4_inode_info *ei = EXT4_I(inode);
150
	ext4_fsblk_t bg_start;
151
	ext4_fsblk_t last_block;
152
	ext4_grpblk_t colour;
153
154
155
156
157
158
159
	int depth;

	if (path) {
		struct ext4_extent *ex;
		depth = path->p_depth;

		/* try to predict block placement */
160
161
		ex = path[depth].p_ext;
		if (ex)
162
			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
163

164
165
		/* it looks like index is empty;
		 * try to find starting block from index itself */
166
167
168
169
170
171
172
		if (path[depth].p_bh)
			return path[depth].p_bh->b_blocknr;
	}

	/* OK. use inode's group */
	bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
		le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
173
174
175
176
	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;

	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
		colour = (current->pid % 16) *
177
			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
178
179
	else
		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
180
181
182
	return bg_start + colour + block;
}

183
static ext4_fsblk_t
184
185
186
187
ext4_ext_new_block(handle_t *handle, struct inode *inode,
			struct ext4_ext_path *path,
			struct ext4_extent *ex, int *err)
{
188
	ext4_fsblk_t goal, newblock;
189
190
191
192
193
194

	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
	newblock = ext4_new_block(handle, inode, goal, err);
	return newblock;
}

195
static int ext4_ext_space_block(struct inode *inode)
196
197
198
199
200
{
	int size;

	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
			/ sizeof(struct ext4_extent);
201
#ifdef AGGRESSIVE_TEST
202
203
204
205
206
207
	if (size > 6)
		size = 6;
#endif
	return size;
}

208
static int ext4_ext_space_block_idx(struct inode *inode)
209
210
211
212
213
{
	int size;

	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
			/ sizeof(struct ext4_extent_idx);
214
#ifdef AGGRESSIVE_TEST
215
216
217
218
219
220
	if (size > 5)
		size = 5;
#endif
	return size;
}

221
static int ext4_ext_space_root(struct inode *inode)
222
223
224
225
226
227
{
	int size;

	size = sizeof(EXT4_I(inode)->i_data);
	size -= sizeof(struct ext4_extent_header);
	size /= sizeof(struct ext4_extent);
228
#ifdef AGGRESSIVE_TEST
229
230
231
232
233
234
	if (size > 3)
		size = 3;
#endif
	return size;
}

235
static int ext4_ext_space_root_idx(struct inode *inode)
236
237
238
239
240
241
{
	int size;

	size = sizeof(EXT4_I(inode)->i_data);
	size -= sizeof(struct ext4_extent_header);
	size /= sizeof(struct ext4_extent_idx);
242
#ifdef AGGRESSIVE_TEST
243
244
245
246
247
248
	if (size > 4)
		size = 4;
#endif
	return size;
}

249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
static int
ext4_ext_max_entries(struct inode *inode, int depth)
{
	int max;

	if (depth == ext_depth(inode)) {
		if (depth == 0)
			max = ext4_ext_space_root(inode);
		else
			max = ext4_ext_space_root_idx(inode);
	} else {
		if (depth == 0)
			max = ext4_ext_space_block(inode);
		else
			max = ext4_ext_space_block_idx(inode);
	}

	return max;
}

static int __ext4_ext_check_header(const char *function, struct inode *inode,
					struct ext4_extent_header *eh,
					int depth)
{
	const char *error_msg;
	int max = 0;

	if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
		error_msg = "invalid magic";
		goto corrupted;
	}
	if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
		error_msg = "unexpected eh_depth";
		goto corrupted;
	}
	if (unlikely(eh->eh_max == 0)) {
		error_msg = "invalid eh_max";
		goto corrupted;
	}
	max = ext4_ext_max_entries(inode, depth);
	if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
		error_msg = "too large eh_max";
		goto corrupted;
	}
	if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
		error_msg = "invalid eh_entries";
		goto corrupted;
	}
	return 0;

corrupted:
	ext4_error(inode->i_sb, function,
			"bad header in inode #%lu: %s - magic %x, "
			"entries %u, max %u(%u), depth %u(%u)",
			inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
			le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
			max, le16_to_cpu(eh->eh_depth), depth);

	return -EIO;
}

#define ext4_ext_check_header(inode, eh, depth)	\
	__ext4_ext_check_header(__FUNCTION__, inode, eh, depth)

313
314
315
316
317
318
319
320
#ifdef EXT_DEBUG
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
{
	int k, l = path->p_depth;

	ext_debug("path:");
	for (k = 0; k <= l; k++, path++) {
		if (path->p_idx) {
321
		  ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
322
			    idx_pblock(path->p_idx));
323
		} else if (path->p_ext) {
324
			ext_debug("  %d:%d:%llu ",
325
				  le32_to_cpu(path->p_ext->ee_block),
Amit Arora's avatar
Amit Arora committed
326
				  ext4_ext_get_actual_len(path->p_ext),
327
				  ext_pblock(path->p_ext));
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
		} else
			ext_debug("  []");
	}
	ext_debug("\n");
}

static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
{
	int depth = ext_depth(inode);
	struct ext4_extent_header *eh;
	struct ext4_extent *ex;
	int i;

	if (!path)
		return;

	eh = path[depth].p_hdr;
	ex = EXT_FIRST_EXTENT(eh);

	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
348
		ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
Amit Arora's avatar
Amit Arora committed
349
			  ext4_ext_get_actual_len(ex), ext_pblock(ex));
350
351
352
353
354
355
356
357
	}
	ext_debug("\n");
}
#else
#define ext4_ext_show_path(inode,path)
#define ext4_ext_show_leaf(inode,path)
#endif

358
void ext4_ext_drop_refs(struct ext4_ext_path *path)
359
360
361
362
363
364
365
366
367
368
369
370
{
	int depth = path->p_depth;
	int i;

	for (i = 0; i <= depth; i++, path++)
		if (path->p_bh) {
			brelse(path->p_bh);
			path->p_bh = NULL;
		}
}

/*
371
372
 * ext4_ext_binsearch_idx:
 * binary search for the closest index of the given block
373
 * the header must be checked before calling this
374
375
 */
static void
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
376
377
ext4_ext_binsearch_idx(struct inode *inode,
			struct ext4_ext_path *path, ext4_lblk_t block)
378
379
380
381
382
{
	struct ext4_extent_header *eh = path->p_hdr;
	struct ext4_extent_idx *r, *l, *m;


383
	ext_debug("binsearch for %u(idx):  ", block);
384
385

	l = EXT_FIRST_INDEX(eh) + 1;
Dmitry Monakhov's avatar
Dmitry Monakhov committed
386
	r = EXT_LAST_INDEX(eh);
387
388
389
390
391
392
	while (l <= r) {
		m = l + (r - l) / 2;
		if (block < le32_to_cpu(m->ei_block))
			r = m - 1;
		else
			l = m + 1;
393
394
395
		ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
				m, le32_to_cpu(m->ei_block),
				r, le32_to_cpu(r->ei_block));
396
397
398
	}

	path->p_idx = l - 1;
399
	ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
400
		  idx_pblock(path->p_idx));
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417

#ifdef CHECK_BINSEARCH
	{
		struct ext4_extent_idx *chix, *ix;
		int k;

		chix = ix = EXT_FIRST_INDEX(eh);
		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
		  if (k != 0 &&
		      le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
				printk("k=%d, ix=0x%p, first=0x%p\n", k,
					ix, EXT_FIRST_INDEX(eh));
				printk("%u <= %u\n",
				       le32_to_cpu(ix->ei_block),
				       le32_to_cpu(ix[-1].ei_block));
			}
			BUG_ON(k && le32_to_cpu(ix->ei_block)
Dave Kleikamp's avatar
Dave Kleikamp committed
418
					   <= le32_to_cpu(ix[-1].ei_block));
419
420
421
422
423
424
425
426
427
428
429
			if (block < le32_to_cpu(ix->ei_block))
				break;
			chix = ix;
		}
		BUG_ON(chix != path->p_idx);
	}
#endif

}

/*
430
431
 * ext4_ext_binsearch:
 * binary search for closest extent of the given block
432
 * the header must be checked before calling this
433
434
 */
static void
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
435
436
ext4_ext_binsearch(struct inode *inode,
		struct ext4_ext_path *path, ext4_lblk_t block)
437
438
439
440
441
442
{
	struct ext4_extent_header *eh = path->p_hdr;
	struct ext4_extent *r, *l, *m;

	if (eh->eh_entries == 0) {
		/*
443
444
		 * this leaf is empty:
		 * we get such a leaf in split/add case
445
446
447
448
		 */
		return;
	}

449
	ext_debug("binsearch for %u:  ", block);
450
451

	l = EXT_FIRST_EXTENT(eh) + 1;
Dmitry Monakhov's avatar
Dmitry Monakhov committed
452
	r = EXT_LAST_EXTENT(eh);
453
454
455
456
457
458
459

	while (l <= r) {
		m = l + (r - l) / 2;
		if (block < le32_to_cpu(m->ee_block))
			r = m - 1;
		else
			l = m + 1;
460
461
462
		ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
				m, le32_to_cpu(m->ee_block),
				r, le32_to_cpu(r->ee_block));
463
464
465
	}

	path->p_ext = l - 1;
466
	ext_debug("  -> %d:%llu:%d ",
Dave Kleikamp's avatar
Dave Kleikamp committed
467
468
			le32_to_cpu(path->p_ext->ee_block),
			ext_pblock(path->p_ext),
Amit Arora's avatar
Amit Arora committed
469
			ext4_ext_get_actual_len(path->p_ext));
470
471
472
473
474
475
476
477
478

#ifdef CHECK_BINSEARCH
	{
		struct ext4_extent *chex, *ex;
		int k;

		chex = ex = EXT_FIRST_EXTENT(eh);
		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
			BUG_ON(k && le32_to_cpu(ex->ee_block)
Dave Kleikamp's avatar
Dave Kleikamp committed
479
					  <= le32_to_cpu(ex[-1].ee_block));
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
			if (block < le32_to_cpu(ex->ee_block))
				break;
			chex = ex;
		}
		BUG_ON(chex != path->p_ext);
	}
#endif

}

int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
{
	struct ext4_extent_header *eh;

	eh = ext_inode_hdr(inode);
	eh->eh_depth = 0;
	eh->eh_entries = 0;
	eh->eh_magic = EXT4_EXT_MAGIC;
	eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode));
	ext4_mark_inode_dirty(handle, inode);
	ext4_ext_invalidate_cache(inode);
	return 0;
}

struct ext4_ext_path *
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
505
506
ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
					struct ext4_ext_path *path)
507
508
509
510
511
512
{
	struct ext4_extent_header *eh;
	struct buffer_head *bh;
	short int depth, i, ppos = 0, alloc = 0;

	eh = ext_inode_hdr(inode);
513
514
	depth = ext_depth(inode);
	if (ext4_ext_check_header(inode, eh, depth))
515
516
517
518
519
		return ERR_PTR(-EIO);


	/* account possible depth increase */
	if (!path) {
520
		path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
521
522
523
524
525
526
527
				GFP_NOFS);
		if (!path)
			return ERR_PTR(-ENOMEM);
		alloc = 1;
	}
	path[0].p_hdr = eh;

528
	i = depth;
529
530
531
532
	/* walk through the tree */
	while (i) {
		ext_debug("depth %d: num %d, max %d\n",
			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
533

534
		ext4_ext_binsearch_idx(inode, path + ppos, block);
535
		path[ppos].p_block = idx_pblock(path[ppos].p_idx);
536
537
538
539
540
541
542
543
544
545
546
547
548
549
		path[ppos].p_depth = i;
		path[ppos].p_ext = NULL;

		bh = sb_bread(inode->i_sb, path[ppos].p_block);
		if (!bh)
			goto err;

		eh = ext_block_hdr(bh);
		ppos++;
		BUG_ON(ppos > depth);
		path[ppos].p_bh = bh;
		path[ppos].p_hdr = eh;
		i--;

550
		if (ext4_ext_check_header(inode, eh, i))
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
			goto err;
	}

	path[ppos].p_depth = i;
	path[ppos].p_hdr = eh;
	path[ppos].p_ext = NULL;
	path[ppos].p_idx = NULL;

	/* find extent */
	ext4_ext_binsearch(inode, path + ppos, block);

	ext4_ext_show_path(inode, path);

	return path;

err:
	ext4_ext_drop_refs(path);
	if (alloc)
		kfree(path);
	return ERR_PTR(-EIO);
}

/*
574
575
576
 * ext4_ext_insert_index:
 * insert new index [@logical;@ptr] into the block at @curp;
 * check where to insert: before @curp or after @curp
577
578
579
 */
static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
				struct ext4_ext_path *curp,
580
				int logical, ext4_fsblk_t ptr)
581
582
583
584
{
	struct ext4_extent_idx *ix;
	int len, err;

585
586
	err = ext4_ext_get_access(handle, inode, curp);
	if (err)
587
588
589
590
591
592
593
594
595
		return err;

	BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
		/* insert after */
		if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
			len = (len - 1) * sizeof(struct ext4_extent_idx);
			len = len < 0 ? 0 : len;
596
			ext_debug("insert new index %d after: %llu. "
597
598
599
600
601
602
603
604
605
606
					"move %d from 0x%p to 0x%p\n",
					logical, ptr, len,
					(curp->p_idx + 1), (curp->p_idx + 2));
			memmove(curp->p_idx + 2, curp->p_idx + 1, len);
		}
		ix = curp->p_idx + 1;
	} else {
		/* insert before */
		len = len * sizeof(struct ext4_extent_idx);
		len = len < 0 ? 0 : len;
607
		ext_debug("insert new index %d before: %llu. "
608
609
610
611
612
613
614
615
				"move %d from 0x%p to 0x%p\n",
				logical, ptr, len,
				curp->p_idx, (curp->p_idx + 1));
		memmove(curp->p_idx + 1, curp->p_idx, len);
		ix = curp->p_idx;
	}

	ix->ei_block = cpu_to_le32(logical);
616
	ext4_idx_store_pblock(ix, ptr);
617
618
619
	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);

	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
Dave Kleikamp's avatar
Dave Kleikamp committed
620
			     > le16_to_cpu(curp->p_hdr->eh_max));
621
622
623
624
625
626
627
628
629
	BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));

	err = ext4_ext_dirty(handle, inode, curp);
	ext4_std_error(inode->i_sb, err);

	return err;
}

/*
630
631
632
633
634
635
636
637
 * ext4_ext_split:
 * inserts new subtree into the path, using free index entry
 * at depth @at:
 * - allocates all needed blocks (new leaf and all intermediate index blocks)
 * - makes decision where to split
 * - moves remaining extents and index entries (right to the split point)
 *   into the newly allocated blocks
 * - initializes subtree
638
639
640
641
642
643
644
645
646
647
648
 */
static int ext4_ext_split(handle_t *handle, struct inode *inode,
				struct ext4_ext_path *path,
				struct ext4_extent *newext, int at)
{
	struct buffer_head *bh = NULL;
	int depth = ext_depth(inode);
	struct ext4_extent_header *neh;
	struct ext4_extent_idx *fidx;
	struct ext4_extent *ex;
	int i = at, k, m, a;
649
	ext4_fsblk_t newblock, oldblock;
650
	__le32 border;
651
	ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
652
653
654
	int err = 0;

	/* make decision: where to split? */
655
	/* FIXME: now decision is simplest: at current extent */
656

657
	/* if current leaf will be split, then we should use
658
659
660
661
	 * border from split point */
	BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
		border = path[depth].p_ext[1].ee_block;
662
		ext_debug("leaf will be split."
663
				" next leaf starts at %d\n",
Dave Kleikamp's avatar
Dave Kleikamp committed
664
				  le32_to_cpu(border));
665
666
667
668
	} else {
		border = newext->ee_block;
		ext_debug("leaf will be added."
				" next leaf starts at %d\n",
Dave Kleikamp's avatar
Dave Kleikamp committed
669
				le32_to_cpu(border));
670
671
672
	}

	/*
673
674
	 * If error occurs, then we break processing
	 * and mark filesystem read-only. index won't
675
	 * be inserted and tree will be in consistent
676
	 * state. Next mount will repair buffers too.
677
678
679
	 */

	/*
680
681
682
	 * Get array to track all allocated blocks.
	 * We need this to handle errors and free blocks
	 * upon them.
683
	 */
684
	ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
	if (!ablocks)
		return -ENOMEM;

	/* allocate all needed blocks */
	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
	for (a = 0; a < depth - at; a++) {
		newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
		if (newblock == 0)
			goto cleanup;
		ablocks[a] = newblock;
	}

	/* initialize new leaf */
	newblock = ablocks[--a];
	BUG_ON(newblock == 0);
	bh = sb_getblk(inode->i_sb, newblock);
	if (!bh) {
		err = -EIO;
		goto cleanup;
	}
	lock_buffer(bh);

707
708
	err = ext4_journal_get_create_access(handle, bh);
	if (err)
709
710
711
712
713
714
715
716
717
		goto cleanup;

	neh = ext_block_hdr(bh);
	neh->eh_entries = 0;
	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
	neh->eh_magic = EXT4_EXT_MAGIC;
	neh->eh_depth = 0;
	ex = EXT_FIRST_EXTENT(neh);

718
	/* move remainder of path[depth] to the new leaf */
719
720
721
722
723
724
725
	BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
	/* start copy from next extent */
	/* TODO: we could do it by single memmove */
	m = 0;
	path[depth].p_ext++;
	while (path[depth].p_ext <=
			EXT_MAX_EXTENT(path[depth].p_hdr)) {
726
		ext_debug("move %d:%llu:%d in new leaf %llu\n",
Dave Kleikamp's avatar
Dave Kleikamp committed
727
728
				le32_to_cpu(path[depth].p_ext->ee_block),
				ext_pblock(path[depth].p_ext),
Amit Arora's avatar
Amit Arora committed
729
				ext4_ext_get_actual_len(path[depth].p_ext),
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
				newblock);
		/*memmove(ex++, path[depth].p_ext++,
				sizeof(struct ext4_extent));
		neh->eh_entries++;*/
		path[depth].p_ext++;
		m++;
	}
	if (m) {
		memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
		neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
	}

	set_buffer_uptodate(bh);
	unlock_buffer(bh);

745
746
	err = ext4_journal_dirty_metadata(handle, bh);
	if (err)
747
748
749
750
751
752
		goto cleanup;
	brelse(bh);
	bh = NULL;

	/* correct old leaf */
	if (m) {
753
754
		err = ext4_ext_get_access(handle, inode, path + depth);
		if (err)
755
756
757
			goto cleanup;
		path[depth].p_hdr->eh_entries =
		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
758
759
		err = ext4_ext_dirty(handle, inode, path + depth);
		if (err)
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
			goto cleanup;

	}

	/* create intermediate indexes */
	k = depth - at - 1;
	BUG_ON(k < 0);
	if (k)
		ext_debug("create %d intermediate indices\n", k);
	/* insert new index into current index block */
	/* current depth stored in i var */
	i = depth - 1;
	while (k--) {
		oldblock = newblock;
		newblock = ablocks[--a];
775
		bh = sb_getblk(inode->i_sb, newblock);
776
777
778
779
780
781
		if (!bh) {
			err = -EIO;
			goto cleanup;
		}
		lock_buffer(bh);

782
783
		err = ext4_journal_get_create_access(handle, bh);
		if (err)
784
785
786
787
788
789
790
791
792
			goto cleanup;

		neh = ext_block_hdr(bh);
		neh->eh_entries = cpu_to_le16(1);
		neh->eh_magic = EXT4_EXT_MAGIC;
		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
		neh->eh_depth = cpu_to_le16(depth - i);
		fidx = EXT_FIRST_INDEX(neh);
		fidx->ei_block = border;
793
		ext4_idx_store_pblock(fidx, oldblock);
794

795
796
		ext_debug("int.index at %d (block %llu): %u -> %llu\n",
				i, newblock, le32_to_cpu(border), oldblock);
797
798
799
800
801
802
803
804
805
		/* copy indexes */
		m = 0;
		path[i].p_idx++;

		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
				EXT_MAX_INDEX(path[i].p_hdr));
		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
				EXT_LAST_INDEX(path[i].p_hdr));
		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
806
			ext_debug("%d: move %d:%llu in new index %llu\n", i,
Dave Kleikamp's avatar
Dave Kleikamp committed
807
808
809
					le32_to_cpu(path[i].p_idx->ei_block),
					idx_pblock(path[i].p_idx),
					newblock);
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
			/*memmove(++fidx, path[i].p_idx++,
					sizeof(struct ext4_extent_idx));
			neh->eh_entries++;
			BUG_ON(neh->eh_entries > neh->eh_max);*/
			path[i].p_idx++;
			m++;
		}
		if (m) {
			memmove(++fidx, path[i].p_idx - m,
				sizeof(struct ext4_extent_idx) * m);
			neh->eh_entries =
				cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
		}
		set_buffer_uptodate(bh);
		unlock_buffer(bh);

826
827
		err = ext4_journal_dirty_metadata(handle, bh);
		if (err)
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
			goto cleanup;
		brelse(bh);
		bh = NULL;

		/* correct old index */
		if (m) {
			err = ext4_ext_get_access(handle, inode, path + i);
			if (err)
				goto cleanup;
			path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
			err = ext4_ext_dirty(handle, inode, path + i);
			if (err)
				goto cleanup;
		}

		i--;
	}

	/* insert new index */
	err = ext4_ext_insert_index(handle, inode, path + at,
				    le32_to_cpu(border), newblock);

cleanup:
	if (bh) {
		if (buffer_locked(bh))
			unlock_buffer(bh);
		brelse(bh);
	}

	if (err) {
		/* free all allocated blocks in error case */
		for (i = 0; i < depth; i++) {
			if (!ablocks[i])
				continue;
862
			ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
863
864
865
866
867
868
869
870
		}
	}
	kfree(ablocks);

	return err;
}

/*
871
872
873
874
875
876
 * ext4_ext_grow_indepth:
 * implements tree growing procedure:
 * - allocates new block
 * - moves top-level data (index block or leaf) into the new block
 * - initializes new top-level, creating index that points to the
 *   just created block
877
878
879
880
881
882
883
884
885
 */
static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
					struct ext4_ext_path *path,
					struct ext4_extent *newext)
{
	struct ext4_ext_path *curp = path;
	struct ext4_extent_header *neh;
	struct ext4_extent_idx *fidx;
	struct buffer_head *bh;
886
	ext4_fsblk_t newblock;
887
888
889
890
891
892
893
894
895
896
897
898
899
900
	int err = 0;

	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
	if (newblock == 0)
		return err;

	bh = sb_getblk(inode->i_sb, newblock);
	if (!bh) {
		err = -EIO;
		ext4_std_error(inode->i_sb, err);
		return err;
	}
	lock_buffer(bh);

901
902
	err = ext4_journal_get_create_access(handle, bh);
	if (err) {
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
		unlock_buffer(bh);
		goto out;
	}

	/* move top-level index/leaf into new block */
	memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));

	/* set size of new block */
	neh = ext_block_hdr(bh);
	/* old root could have indexes or leaves
	 * so calculate e_max right way */
	if (ext_depth(inode))
	  neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
	else
	  neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
	neh->eh_magic = EXT4_EXT_MAGIC;
	set_buffer_uptodate(bh);
	unlock_buffer(bh);

922
923
	err = ext4_journal_dirty_metadata(handle, bh);
	if (err)
924
925
926
		goto out;

	/* create index in new top-level index: num,max,pointer */
927
928
	err = ext4_ext_get_access(handle, inode, curp);
	if (err)
929
930
931
932
933
934
		goto out;

	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
	curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
	curp->p_hdr->eh_entries = cpu_to_le16(1);
	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
Dmitry Monakhov's avatar
Dmitry Monakhov committed
935
936
937
938
939
940
941

	if (path[0].p_hdr->eh_depth)
		curp->p_idx->ei_block =
			EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
	else
		curp->p_idx->ei_block =
			EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
942
	ext4_idx_store_pblock(curp->p_idx, newblock);
943
944
945

	neh = ext_inode_hdr(inode);
	fidx = EXT_FIRST_INDEX(neh);
946
	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
947
		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
948
		  le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
949
950
951
952
953
954
955
956
957
958

	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
	err = ext4_ext_dirty(handle, inode, curp);
out:
	brelse(bh);

	return err;
}

/*
959
960
961
 * ext4_ext_create_new_leaf:
 * finds empty index and adds new leaf.
 * if no free index is found, then it requests in-depth growing.
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
 */
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
					struct ext4_ext_path *path,
					struct ext4_extent *newext)
{
	struct ext4_ext_path *curp;
	int depth, i, err = 0;

repeat:
	i = depth = ext_depth(inode);

	/* walk up to the tree and look for free index entry */
	curp = path + depth;
	while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
		i--;
		curp--;
	}

980
981
	/* we use already allocated block for index block,
	 * so subsequent data blocks should be contiguous */
982
983
984
985
986
987
988
989
	if (EXT_HAS_FREE_INDEX(curp)) {
		/* if we found index with free entry, then use that
		 * entry: create all needed subtree and add new leaf */
		err = ext4_ext_split(handle, inode, path, newext, i);

		/* refill path */
		ext4_ext_drop_refs(path);
		path = ext4_ext_find_extent(inode,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
990
991
				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
				    path);
992
993
994
995
996
997
998
999
1000
		if (IS_ERR(path))
			err = PTR_ERR(path);
	} else {
		/* tree is full, time to grow in depth */
		err = ext4_ext_grow_indepth(handle, inode, path, newext);
		if (err)
			goto out;

		/* refill path */
For faster browsing, not all history is shown. View entire blame