balloc.c 21.5 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/balloc.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 */

#include <linux/time.h>
#include <linux/capability.h>
#include <linux/fs.h>
17
#include <linux/jbd2.h>
18
19
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
20
21
#include "ext4.h"
#include "ext4_jbd2.h"
22
#include "mballoc.h"
23

24
25
#include <trace/events/ext4.h>

Eric Sandeen's avatar
Eric Sandeen committed
26
27
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
					    ext4_group_t block_group);
28
29
30
31
/*
 * balloc.c contains the blocks allocation and deallocation routines
 */

32
/*
33
34
 * Calculate the block group number and offset into the block/cluster
 * allocation bitmap, given a block number
35
36
 */
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
37
		ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
38
{
Dave Kleikamp's avatar
Dave Kleikamp committed
39
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
40
41
	ext4_grpblk_t offset;

Dave Kleikamp's avatar
Dave Kleikamp committed
42
	blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
43
44
	offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
		EXT4_SB(sb)->s_cluster_bits;
45
46
47
	if (offsetp)
		*offsetp = offset;
	if (blockgrpp)
Dave Kleikamp's avatar
Dave Kleikamp committed
48
		*blockgrpp = blocknr;
49
50
51

}

52
53
54
55
static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
			ext4_group_t block_group)
{
	ext4_group_t actual_group;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
56
	ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
57
58
59
60
61
	if (actual_group == block_group)
		return 1;
	return 0;
}

62
63
64
65
66
67
/* Return the number of clusters used for file system metadata; this
 * represents the overhead needed by the file system.
 */
unsigned ext4_num_overhead_clusters(struct super_block *sb,
				    ext4_group_t block_group,
				    struct ext4_group_desc *gdp)
68
{
69
70
71
72
	unsigned num_clusters;
	int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
	ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
	ext4_fsblk_t itbl_blk;
73
74
	struct ext4_sb_info *sbi = EXT4_SB(sb);

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
	/* This is the number of clusters used by the superblock,
	 * block group descriptors, and reserved block group
	 * descriptor blocks */
	num_clusters = ext4_num_base_meta_clusters(sb, block_group);

	/*
	 * For the allocation bitmaps and inode table, we first need
	 * to check to see if the block is in the block group.  If it
	 * is, then check to see if the cluster is already accounted
	 * for in the clusters used for the base metadata cluster, or
	 * if we can increment the base metadata cluster to include
	 * that block.  Otherwise, we will have to track the cluster
	 * used for the allocation bitmap or inode table explicitly.
	 * Normally all of these blocks are contiguous, so the special
	 * case handling shouldn't be necessary except for *very*
	 * unusual file system layouts.
	 */
	if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
		block_cluster = EXT4_B2C(sbi, (start -
					       ext4_block_bitmap(sb, gdp)));
		if (block_cluster < num_clusters)
			block_cluster = -1;
		else if (block_cluster == num_clusters) {
			num_clusters++;
			block_cluster = -1;
		}
	}

	if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
		inode_cluster = EXT4_B2C(sbi,
					 start - ext4_inode_bitmap(sb, gdp));
		if (inode_cluster < num_clusters)
			inode_cluster = -1;
		else if (inode_cluster == num_clusters) {
			num_clusters++;
			inode_cluster = -1;
		}
	}

	itbl_blk = ext4_inode_table(sb, gdp);
	for (i = 0; i < sbi->s_itb_per_group; i++) {
		if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
			c = EXT4_B2C(sbi, start - itbl_blk + i);
			if ((c < num_clusters) || (c == inode_cluster) ||
			    (c == block_cluster) || (c == itbl_cluster))
				continue;
			if (c == num_clusters) {
				num_clusters++;
				continue;
			}
			num_clusters++;
			itbl_cluster = c;
127
128
		}
	}
129
130
131
132
133
134
135

	if (block_cluster != -1)
		num_clusters++;
	if (inode_cluster != -1)
		num_clusters++;

	return num_clusters;
136
}
137

138
139
static unsigned int num_clusters_in_group(struct super_block *sb,
					  ext4_group_t block_group)
140
{
141
142
	unsigned int blocks;

143
144
145
146
147
148
149
	if (block_group == ext4_get_groups_count(sb) - 1) {
		/*
		 * Even though mke2fs always initializes the first and
		 * last group, just in case some other tool was used,
		 * we need to make sure we calculate the right free
		 * blocks.
		 */
150
		blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
151
152
			ext4_group_first_block_no(sb, block_group);
	} else
153
154
		blocks = EXT4_BLOCKS_PER_GROUP(sb);
	return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
155
156
}

157
158
159
160
/* Initializes an uninitialized block bitmap */
void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
			    ext4_group_t block_group,
			    struct ext4_group_desc *gdp)
161
{
162
	unsigned int bit, bit_max;
163
	struct ext4_sb_info *sbi = EXT4_SB(sb);
164
165
166
167
168
169
170
171
172
	ext4_fsblk_t start, tmp;
	int flex_bg = 0;

	J_ASSERT_BH(bh, buffer_locked(bh));

	/* If checksum is bad mark all blocks used to prevent allocation
	 * essentially implementing a per-group read-only flag. */
	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
		ext4_error(sb, "Checksum bad for group %u", block_group);
173
		ext4_free_group_clusters_set(sb, gdp, 0);
174
175
176
177
		ext4_free_inodes_set(sb, gdp, 0);
		ext4_itable_unused_set(sb, gdp, 0);
		memset(bh->b_data, 0xff, sb->s_blocksize);
		return;
178
	}
179
	memset(bh->b_data, 0, sb->s_blocksize);
180

181
	bit_max = ext4_num_base_meta_clusters(sb, block_group);
182
183
	for (bit = 0; bit < bit_max; bit++)
		ext4_set_bit(bit, bh->b_data);
184

185
	start = ext4_group_first_block_no(sb, block_group);
186

187
188
	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
		flex_bg = 1;
189

190
191
192
	/* Set bits for block and inode bitmaps, and inode table */
	tmp = ext4_block_bitmap(sb, gdp);
	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
193
		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
194

195
196
	tmp = ext4_inode_bitmap(sb, gdp);
	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
197
		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
198

199
200
201
	tmp = ext4_inode_table(sb, gdp);
	for (; tmp < ext4_inode_table(sb, gdp) +
		     sbi->s_itb_per_group; tmp++) {
202
		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
203
			ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
204
	}
205

206
207
208
209
210
	/*
	 * Also if the number of blocks within the group is less than
	 * the blocksize * 8 ( which is the size of bitmap ), set rest
	 * of the block bitmap to 1
	 */
211
	ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
212
			     sb->s_blocksize * 8, bh->b_data);
213
214
}

215
216
217
/* Return the number of free blocks in a block group.  It is used when
 * the block bitmap is uninitialized, so we can't just count the bits
 * in the bitmap. */
218
219
220
unsigned ext4_free_clusters_after_init(struct super_block *sb,
				       ext4_group_t block_group,
				       struct ext4_group_desc *gdp)
221
{
222
223
	return num_clusters_in_group(sb, block_group) - 
		ext4_num_overhead_clusters(sb, block_group, gdp);
224
}
225

226
227
228
229
230
231
232
233
/*
 * The free blocks are managed by bitmaps.  A file system contains several
 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
 * block for inodes, N blocks for the inode table and data blocks.
 *
 * The file system contains group descriptors which are located after the
 * super block.  Each descriptor contains the number of the bitmap block and
 * the free blocks count in the block.  The descriptors are loaded in memory
234
 * when a file system is mounted (see ext4_fill_super).
235
236
237
 */

/**
238
 * ext4_get_group_desc() -- load group descriptor from disk
239
240
241
242
243
 * @sb:			super block
 * @block_group:	given block group
 * @bh:			pointer to the buffer head to store the block
 *			group descriptor
 */
244
struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
245
					     ext4_group_t block_group,
246
					     struct buffer_head **bh)
247
{
248
249
	unsigned int group_desc;
	unsigned int offset;
250
	ext4_group_t ngroups = ext4_get_groups_count(sb);
251
	struct ext4_group_desc *desc;
252
	struct ext4_sb_info *sbi = EXT4_SB(sb);
253

254
	if (block_group >= ngroups) {
255
256
		ext4_error(sb, "block_group >= groups_count - block_group = %u,"
			   " groups_count = %u", block_group, ngroups);
257
258
259
260

		return NULL;
	}

261
262
	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
263
	if (!sbi->s_group_desc[group_desc]) {
264
		ext4_error(sb, "Group descriptor not loaded - "
265
			   "block_group = %u, group_desc = %u, desc = %u",
266
			   block_group, group_desc, offset);
267
268
269
		return NULL;
	}

270
271
272
	desc = (struct ext4_group_desc *)(
		(__u8 *)sbi->s_group_desc[group_desc]->b_data +
		offset * EXT4_DESC_SIZE(sb));
273
274
	if (bh)
		*bh = sbi->s_group_desc[group_desc];
275
	return desc;
276
277
}

278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
static int ext4_valid_block_bitmap(struct super_block *sb,
					struct ext4_group_desc *desc,
					unsigned int block_group,
					struct buffer_head *bh)
{
	ext4_grpblk_t offset;
	ext4_grpblk_t next_zero_bit;
	ext4_fsblk_t bitmap_blk;
	ext4_fsblk_t group_first_block;

	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
		/* with FLEX_BG, the inode/block bitmaps and itable
		 * blocks may not be in the group at all
		 * so the bitmap validation will be skipped for those groups
		 * or it has to also read the block group where the bitmaps
		 * are located to verify they are set.
		 */
		return 1;
	}
	group_first_block = ext4_group_first_block_no(sb, block_group);

	/* check whether block bitmap block number is set */
	bitmap_blk = ext4_block_bitmap(sb, desc);
	offset = bitmap_blk - group_first_block;
	if (!ext4_test_bit(offset, bh->b_data))
		/* bad block bitmap */
		goto err_out;

	/* check whether the inode bitmap block number is set */
	bitmap_blk = ext4_inode_bitmap(sb, desc);
	offset = bitmap_blk - group_first_block;
	if (!ext4_test_bit(offset, bh->b_data))
		/* bad block bitmap */
		goto err_out;

	/* check whether the inode table block number is set */
	bitmap_blk = ext4_inode_table(sb, desc);
	offset = bitmap_blk - group_first_block;
	next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
				offset + EXT4_SB(sb)->s_itb_per_group,
				offset);
	if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
		/* good bitmap for inode tables */
		return 1;

err_out:
324
	ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
325
326
327
			block_group, bitmap_blk);
	return 0;
}
328
/**
329
 * ext4_read_block_bitmap()
330
331
332
 * @sb:			super block
 * @block_group:	given block group
 *
333
334
 * Read the bitmap for a given block_group,and validate the
 * bits for block/inode/inode tables are set in the bitmaps
335
336
337
 *
 * Return buffer_head on success or NULL in case of failure.
 */
338
struct buffer_head *
339
ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
340
{
341
342
	struct ext4_group_desc *desc;
	struct buffer_head *bh = NULL;
343
	ext4_fsblk_t bitmap_blk;
344

345
	desc = ext4_get_group_desc(sb, block_group, NULL);
346
	if (!desc)
347
348
		return NULL;
	bitmap_blk = ext4_block_bitmap(sb, desc);
349
350
	bh = sb_getblk(sb, bitmap_blk);
	if (unlikely(!bh)) {
351
		ext4_error(sb, "Cannot read block bitmap - "
352
			    "block_group = %u, block_bitmap = %llu",
353
			    block_group, bitmap_blk);
354
355
		return NULL;
	}
356
357

	if (bitmap_uptodate(bh))
358
359
		return bh;

360
	lock_buffer(bh);
361
362
363
364
	if (bitmap_uptodate(bh)) {
		unlock_buffer(bh);
		return bh;
	}
365
	ext4_lock_group(sb, block_group);
366
	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
367
		ext4_init_block_bitmap(sb, bh, block_group, desc);
368
		set_bitmap_uptodate(bh);
369
		set_buffer_uptodate(bh);
370
		ext4_unlock_group(sb, block_group);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
371
		unlock_buffer(bh);
372
		return bh;
373
	}
374
	ext4_unlock_group(sb, block_group);
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
	if (buffer_uptodate(bh)) {
		/*
		 * if not uninit if bh is uptodate,
		 * bitmap is also uptodate
		 */
		set_bitmap_uptodate(bh);
		unlock_buffer(bh);
		return bh;
	}
	/*
	 * submit the buffer_head for read. We can
	 * safely mark the bitmap as uptodate now.
	 * We do it here so the bitmap uptodate bit
	 * get set with buffer lock held.
	 */
390
	trace_ext4_read_block_bitmap_load(sb, block_group);
391
	set_bitmap_uptodate(bh);
392
393
	if (bh_submit_read(bh) < 0) {
		put_bh(bh);
394
		ext4_error(sb, "Cannot read block bitmap - "
395
			    "block_group = %u, block_bitmap = %llu",
396
			    block_group, bitmap_blk);
397
398
		return NULL;
	}
399
400
401
402
403
	ext4_valid_block_bitmap(sb, desc, block_group, bh);
	/*
	 * file system mounted not to panic on error,
	 * continue with corrupt bitmap
	 */
404
405
406
	return bh;
}

407
/**
408
 * ext4_has_free_clusters()
409
 * @sbi:	in-core super block structure.
410
411
 * @nclusters:	number of needed blocks
 * @flags:	flags from ext4_mb_new_blocks()
412
 *
413
 * Check if filesystem has nclusters free & available for allocation.
414
415
 * On success return 1, return 0 on failure.
 */
416
417
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
				  s64 nclusters, unsigned int flags)
418
{
419
	s64 free_clusters, dirty_clusters, root_clusters;
420
	struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
421
	struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
422

423
424
425
	free_clusters  = percpu_counter_read_positive(fcc);
	dirty_clusters = percpu_counter_read_positive(dcc);
	root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
426

427
428
429
430
	if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
					EXT4_FREECLUSTERS_WATERMARK) {
		free_clusters  = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
		dirty_clusters = percpu_counter_sum_positive(dcc);
431
	}
432
433
	/* Check whether we have space after accounting for current
	 * dirty clusters & root reserved clusters.
434
	 */
435
	if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
436
		return 1;
437

438
	/* Hm, nope.  Are (enough) root reserved clusters available? */
439
	if (sbi->s_resuid == current_fsuid() ||
440
	    ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
441
442
443
	    capable(CAP_SYS_RESOURCE) ||
		(flags & EXT4_MB_USE_ROOT_BLOCKS)) {

444
		if (free_clusters >= (nclusters + dirty_clusters))
445
446
447
448
			return 1;
	}

	return 0;
449
450
}

451
452
int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
			     s64 nclusters, unsigned int flags)
453
{
454
	if (ext4_has_free_clusters(sbi, nclusters, flags)) {
455
		percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
456
		return 0;
457
458
	} else
		return -ENOSPC;
459
}
460

461
/**
462
 * ext4_should_retry_alloc()
463
464
465
 * @sb:			super block
 * @retries		number of attemps has been made
 *
466
 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
467
 * it is profitable to retry the operation, this function will wait
Lucas De Marchi's avatar
Lucas De Marchi committed
468
 * for the current or committing transaction to complete, and then
469
470
471
472
 * return TRUE.
 *
 * if the total number of retries exceed three times, return FALSE.
 */
473
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
474
{
475
	if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
476
477
	    (*retries)++ > 3 ||
	    !EXT4_SB(sb)->s_journal)
478
479
480
481
		return 0;

	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);

482
	return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
483
484
}

485
/*
486
 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
487
488
489
490
 *
 * @handle:             handle to this transaction
 * @inode:              file inode
 * @goal:               given target block(filesystem wide)
491
 * @count:		pointer to total number of clusters needed
492
493
 * @errp:               error code
 *
Theodore Ts'o's avatar
Theodore Ts'o committed
494
 * Return 1st allocated block number on success, *count stores total account
495
 * error stores in errp pointer
496
 */
497
ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
498
499
				  ext4_fsblk_t goal, unsigned int flags,
				  unsigned long *count, int *errp)
500
{
Theodore Ts'o's avatar
Theodore Ts'o committed
501
	struct ext4_allocation_request ar;
502
	ext4_fsblk_t ret;
Theodore Ts'o's avatar
Theodore Ts'o committed
503
504
505
506
507
508

	memset(&ar, 0, sizeof(ar));
	/* Fill with neighbour allocated blocks */
	ar.inode = inode;
	ar.goal = goal;
	ar.len = count ? *count : 1;
509
	ar.flags = flags;
Theodore Ts'o's avatar
Theodore Ts'o committed
510
511
512
513

	ret = ext4_mb_new_blocks(handle, &ar, errp);
	if (count)
		*count = ar.len;
514
	/*
515
516
	 * Account for the allocated meta blocks.  We will never
	 * fail EDQUOT for metdata, but we do account for it.
517
	 */
518
519
	if (!(*errp) &&
	    ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
520
		spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
Theodore Ts'o's avatar
Theodore Ts'o committed
521
		EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
522
		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
523
524
		dquot_alloc_block_nofail(inode,
				EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
525
526
	}
	return ret;
527
528
}

529
/**
530
 * ext4_count_free_clusters() -- count filesystem free clusters
531
532
 * @sb:		superblock
 *
533
 * Adds up the number of free clusters from each block group.
534
 */
535
ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
536
{
537
538
	ext4_fsblk_t desc_count;
	struct ext4_group_desc *gdp;
539
	ext4_group_t i;
540
	ext4_group_t ngroups = ext4_get_groups_count(sb);
541
542
543
#ifdef EXT4FS_DEBUG
	struct ext4_super_block *es;
	ext4_fsblk_t bitmap_count;
544
	unsigned int x;
545
546
	struct buffer_head *bitmap_bh = NULL;

547
	es = EXT4_SB(sb)->s_es;
548
549
550
551
552
	desc_count = 0;
	bitmap_count = 0;
	gdp = NULL;

	for (i = 0; i < ngroups; i++) {
553
		gdp = ext4_get_group_desc(sb, i, NULL);
554
555
		if (!gdp)
			continue;
556
		desc_count += ext4_free_group_clusters(sb, gdp);
557
		brelse(bitmap_bh);
558
		bitmap_bh = ext4_read_block_bitmap(sb, i);
559
560
561
		if (bitmap_bh == NULL)
			continue;

562
		x = ext4_count_free(bitmap_bh, sb->s_blocksize);
563
		printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
564
			i, ext4_free_group_clusters(sb, gdp), x);
565
566
567
		bitmap_count += x;
	}
	brelse(bitmap_bh);
568
569
	printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
	       ", computed = %llu, %llu\n",
570
	       EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
571
	       desc_count, bitmap_count);
572
573
574
575
	return bitmap_count;
#else
	desc_count = 0;
	for (i = 0; i < ngroups; i++) {
576
		gdp = ext4_get_group_desc(sb, i, NULL);
577
578
		if (!gdp)
			continue;
579
		desc_count += ext4_free_group_clusters(sb, gdp);
580
581
582
583
584
585
	}

	return desc_count;
#endif
}

586
static inline int test_root(ext4_group_t a, int b)
587
588
589
590
591
592
593
594
{
	int num = b;

	while (a > num)
		num *= b;
	return num == a;
}

595
static int ext4_group_sparse(ext4_group_t group)
596
597
598
599
600
601
602
603
604
605
{
	if (group <= 1)
		return 1;
	if (!(group & 1))
		return 0;
	return (test_root(group, 7) || test_root(group, 5) ||
		test_root(group, 3));
}

/**
606
 *	ext4_bg_has_super - number of blocks used by the superblock in group
607
608
609
610
611
612
 *	@sb: superblock for filesystem
 *	@group: group number to check
 *
 *	Return the number of blocks used by the superblock (primary or backup)
 *	in this group.  Currently this will be only 0 or 1.
 */
613
int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
614
{
615
616
617
	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
			!ext4_group_sparse(group))
618
619
620
621
		return 0;
	return 1;
}

622
623
static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
					ext4_group_t group)
624
{
625
	unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
626
627
	ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
	ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
628
629
630
631
632
633

	if (group == first || group == first + 1 || group == last)
		return 1;
	return 0;
}

634
635
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
					ext4_group_t group)
636
{
637
638
639
640
641
642
643
	if (!ext4_bg_has_super(sb, group))
		return 0;

	if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
		return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
	else
		return EXT4_SB(sb)->s_gdb_count;
644
645
646
}

/**
647
 *	ext4_bg_num_gdb - number of blocks used by the group table in group
648
649
650
651
652
653
654
 *	@sb: superblock for filesystem
 *	@group: group number to check
 *
 *	Return the number of blocks used by the group descriptor table
 *	(primary or backup) in this group.  In the future there may be a
 *	different number of descriptor blocks in each group.
 */
655
unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
656
657
{
	unsigned long first_meta_bg =
658
659
			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
	unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
660

661
	if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
662
			metagroup < first_meta_bg)
663
		return ext4_bg_num_gdb_nometa(sb, group);
664

665
	return ext4_bg_num_gdb_meta(sb,group);
666
667

}
668

669
/*
670
 * This function returns the number of file system metadata clusters at
671
672
 * the beginning of a block group, including the reserved gdt blocks.
 */
Eric Sandeen's avatar
Eric Sandeen committed
673
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
674
				     ext4_group_t block_group)
675
676
{
	struct ext4_sb_info *sbi = EXT4_SB(sb);
677
	unsigned num;
678
679
680
681
682
683
684
685
686
687
688
689
690
691

	/* Check for superblock and gdt backups in this group */
	num = ext4_bg_has_super(sb, block_group);

	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
	    block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
			  sbi->s_desc_per_block) {
		if (num) {
			num += ext4_bg_num_gdb(sb, block_group);
			num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
		}
	} else { /* For META_BG_BLOCK_GROUPS */
		num += ext4_bg_num_gdb(sb, block_group);
	}
692
	return EXT4_NUM_B2C(sbi, num);
693
}
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
/**
 *	ext4_inode_to_goal_block - return a hint for block allocation
 *	@inode: inode for block allocation
 *
 *	Return the ideal location to start allocating blocks for a
 *	newly created inode.
 */
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
{
	struct ext4_inode_info *ei = EXT4_I(inode);
	ext4_group_t block_group;
	ext4_grpblk_t colour;
	int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
	ext4_fsblk_t bg_start;
	ext4_fsblk_t last_block;

	block_group = ei->i_block_group;
	if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
		/*
		 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
		 * block groups per flexgroup, reserve the first block
		 * group for directories and special files.  Regular
		 * files will start at the second block group.  This
		 * tends to speed up directory access and improves
		 * fsck times.
		 */
		block_group &= ~(flex_size-1);
		if (S_ISREG(inode->i_mode))
			block_group++;
	}
	bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;

	/*
	 * If we are doing delayed allocation, we don't need take
	 * colour into account.
	 */
	if (test_opt(inode->i_sb, DELALLOC))
		return bg_start;

	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
		colour = (current->pid % 16) *
			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
	else
		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
	return bg_start + colour;
}