dcache.c 89 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * fs/dcache.c
 *
 * Complete reimplementation
 * (C) 1997 Thomas Schoebel-Theuer,
 * with heavy changes by Linus Torvalds
 */

/*
 * Notes on the allocation strategy:
 *
 * The dcache is a master of the icache - whenever a dcache entry
 * exists, the inode will always exist. "iput()" is done either when
 * the dcache entry is deleted or garbage collected.
 */

#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
21
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
22
23
24
25
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/cache.h>
26
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
28
29
30
31
32
33
#include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
34
#include <linux/fs_struct.h>
35
#include <linux/hardirq.h>
36
37
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
38
#include <linux/prefetch.h>
39
#include <linux/ratelimit.h>
40
#include <linux/list_lru.h>
41
42
#include <linux/kasan.h>

43
#include "internal.h"
44
#include "mount.h"
Linus Torvalds's avatar
Linus Torvalds committed
45

Nick Piggin's avatar
Nick Piggin committed
46
47
/*
 * Usage:
48
 * dcache->d_inode->i_lock protects:
49
 *   - i_dentry, d_u.d_alias, d_inode of aliases
50
51
52
53
 * dcache_hash_bucket lock protects:
 *   - the dcache hash table
 * s_anon bl list spinlock protects:
 *   - the s_anon list (see __d_drop)
54
 * dentry->d_sb->s_dentry_lru_lock protects:
Nick Piggin's avatar
Nick Piggin committed
55
56
57
58
59
 *   - the dcache lru lists and counters
 * d_lock protects:
 *   - d_flags
 *   - d_name
 *   - d_lru
Nick Piggin's avatar
Nick Piggin committed
60
 *   - d_count
Nick Piggin's avatar
Nick Piggin committed
61
 *   - d_unhashed()
Nick Piggin's avatar
Nick Piggin committed
62
63
 *   - d_parent and d_subdirs
 *   - childrens' d_child and d_parent
64
 *   - d_u.d_alias, d_inode
Nick Piggin's avatar
Nick Piggin committed
65
66
 *
 * Ordering:
67
 * dentry->d_inode->i_lock
Nick Piggin's avatar
Nick Piggin committed
68
 *   dentry->d_lock
69
 *     dentry->d_sb->s_dentry_lru_lock
70
71
 *     dcache_hash_bucket lock
 *     s_anon lock
Nick Piggin's avatar
Nick Piggin committed
72
 *
Nick Piggin's avatar
Nick Piggin committed
73
74
75
76
77
78
79
 * If there is an ancestor relationship:
 * dentry->d_parent->...->d_parent->d_lock
 *   ...
 *     dentry->d_parent->d_lock
 *       dentry->d_lock
 *
 * If no ancestor relationship:
Nick Piggin's avatar
Nick Piggin committed
80
81
82
83
 * if (dentry1 < dentry2)
 *   dentry1->d_lock
 *     dentry2->d_lock
 */
84
int sysctl_vfs_cache_pressure __read_mostly = 100;
Linus Torvalds's avatar
Linus Torvalds committed
85
86
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);

Al Viro's avatar
Al Viro committed
87
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
88

89
EXPORT_SYMBOL(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
90

91
static struct kmem_cache *dentry_cache __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
92
93
94
95
96
97
98
99
100
101

/*
 * This is the single most critical data structure when it comes
 * to the dcache: the hashtable for lookups. Somebody should try
 * to make this good - I've just made it work.
 *
 * This hash-function tries to avoid losing too many bits of hash
 * information, yet avoid using a prime hash-size or similar.
 */

102
103
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
104

105
static struct hlist_bl_head *dentry_hashtable __read_mostly;
106

107
static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108
					unsigned int hash)
109
{
110
	hash += (unsigned long) parent / L1_CACHE_BYTES;
111
	return dentry_hashtable + hash_32(hash, d_hash_shift);
112
113
}

Linus Torvalds's avatar
Linus Torvalds committed
114
115
116
117
118
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
	.age_limit = 45,
};

119
static DEFINE_PER_CPU(long, nr_dentry);
120
static DEFINE_PER_CPU(long, nr_dentry_unused);
121
122

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
123
124
125
126
127
128
129
130
131
132
133
134
135

/*
 * Here we resort to our own counters instead of using generic per-cpu counters
 * for consistency with what the vfs inode code does. We are expected to harvest
 * better code and performance by having our own specialized counters.
 *
 * Please note that the loop is done over all possible CPUs, not over all online
 * CPUs. The reason for this is that we don't want to play games with CPUs going
 * on and off. If one of them goes off, we will just keep their counters.
 *
 * glommer: See cffbc8a for details, and if you ever intend to change this,
 * please update all vfs counters to match.
 */
136
static long get_nr_dentry(void)
137
138
{
	int i;
139
	long sum = 0;
140
141
142
143
144
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry, i);
	return sum < 0 ? 0 : sum;
}

145
146
147
148
149
150
151
152
153
static long get_nr_dentry_unused(void)
{
	int i;
	long sum = 0;
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry_unused, i);
	return sum < 0 ? 0 : sum;
}

154
int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
155
156
		   size_t *lenp, loff_t *ppos)
{
157
	dentry_stat.nr_dentry = get_nr_dentry();
158
	dentry_stat.nr_unused = get_nr_dentry_unused();
159
	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
160
161
162
}
#endif

163
164
165
166
/*
 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 * The strings are both count bytes long, and count is non-zero.
 */
167
168
169
170
171
172
173
174
175
176
177
178
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>
/*
 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 * aligned allocation for this particular component. We don't
 * strictly need the load_unaligned_zeropad() safety, but it
 * doesn't hurt either.
 *
 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 * need the careful unaligned handling.
 */
179
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
180
{
181
182
183
	unsigned long a,b,mask;

	for (;;) {
184
		a = *(unsigned long *)cs;
185
		b = load_unaligned_zeropad(ct);
186
187
188
189
190
191
192
193
194
195
		if (tcount < sizeof(unsigned long))
			break;
		if (unlikely(a != b))
			return 1;
		cs += sizeof(unsigned long);
		ct += sizeof(unsigned long);
		tcount -= sizeof(unsigned long);
		if (!tcount)
			return 0;
	}
196
	mask = bytemask_from_count(tcount);
197
	return unlikely(!!((a ^ b) & mask));
198
199
}

200
#else
201

202
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
203
{
204
205
206
207
208
209
210
211
212
213
	do {
		if (*cs != *ct)
			return 1;
		cs++;
		ct++;
		tcount--;
	} while (tcount);
	return 0;
}

214
215
#endif

216
217
static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
218
	const unsigned char *cs;
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
	/*
	 * Be careful about RCU walk racing with rename:
	 * use ACCESS_ONCE to fetch the name pointer.
	 *
	 * NOTE! Even if a rename will mean that the length
	 * was not loaded atomically, we don't care. The
	 * RCU walk will check the sequence count eventually,
	 * and catch it. And we won't overrun the buffer,
	 * because we're reading the name pointer atomically,
	 * and a dentry name is guaranteed to be properly
	 * terminated with a NUL byte.
	 *
	 * End result: even if 'len' is wrong, we'll exit
	 * early because the data cannot match (there can
	 * be no NUL in the ct/tcount data)
	 */
235
236
237
	cs = ACCESS_ONCE(dentry->d_name.name);
	smp_read_barrier_depends();
	return dentry_string_cmp(cs, ct, tcount);
238
239
}

240
241
242
243
244
245
246
247
248
249
250
251
252
struct external_name {
	union {
		atomic_t count;
		struct rcu_head head;
	} u;
	unsigned char name[];
};

static inline struct external_name *external_name(struct dentry *dentry)
{
	return container_of(dentry->d_name.name, struct external_name, name[0]);
}

Christoph Hellwig's avatar
Christoph Hellwig committed
253
static void __d_free(struct rcu_head *head)
Linus Torvalds's avatar
Linus Torvalds committed
254
{
Christoph Hellwig's avatar
Christoph Hellwig committed
255
256
	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);

257
258
259
260
261
262
263
	kmem_cache_free(dentry_cache, dentry); 
}

static void __d_free_external(struct rcu_head *head)
{
	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
	kfree(external_name(dentry));
Linus Torvalds's avatar
Linus Torvalds committed
264
265
266
	kmem_cache_free(dentry_cache, dentry); 
}

267
268
269
270
271
static inline int dname_external(const struct dentry *dentry)
{
	return dentry->d_name.name != dentry->d_iname;
}

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
static inline void __d_set_inode_and_type(struct dentry *dentry,
					  struct inode *inode,
					  unsigned type_flags)
{
	unsigned flags;

	dentry->d_inode = inode;
	flags = READ_ONCE(dentry->d_flags);
	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
	flags |= type_flags;
	WRITE_ONCE(dentry->d_flags, flags);
}

static inline void __d_clear_type_and_inode(struct dentry *dentry)
{
	unsigned flags = READ_ONCE(dentry->d_flags);

	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
	WRITE_ONCE(dentry->d_flags, flags);
	dentry->d_inode = NULL;
}

Al Viro's avatar
Al Viro committed
294
295
static void dentry_free(struct dentry *dentry)
{
296
	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
297
298
299
300
301
302
303
	if (unlikely(dname_external(dentry))) {
		struct external_name *p = external_name(dentry);
		if (likely(atomic_dec_and_test(&p->u.count))) {
			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
			return;
		}
	}
Al Viro's avatar
Al Viro committed
304
305
306
307
308
309
310
	/* if dentry was never visible to RCU, immediate free is OK */
	if (!(dentry->d_flags & DCACHE_RCUACCESS))
		__d_free(&dentry->d_u.d_rcu);
	else
		call_rcu(&dentry->d_u.d_rcu, __d_free);
}

Nick Piggin's avatar
Nick Piggin committed
311
/**
312
 * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups
313
 * @dentry: the target dentry
Nick Piggin's avatar
Nick Piggin committed
314
315
316
317
 * After this call, in-progress rcu-walk path lookup will fail. This
 * should be called after unhashing, and after changing d_inode (if
 * the dentry has not already been unhashed).
 */
318
static inline void dentry_rcuwalk_invalidate(struct dentry *dentry)
Nick Piggin's avatar
Nick Piggin committed
319
{
320
321
322
	lockdep_assert_held(&dentry->d_lock);
	/* Go through am invalidation barrier */
	write_seqcount_invalidate(&dentry->d_seq);
Nick Piggin's avatar
Nick Piggin committed
323
324
}

Linus Torvalds's avatar
Linus Torvalds committed
325
326
/*
 * Release the dentry's inode, using the filesystem
Nick Piggin's avatar
Nick Piggin committed
327
328
 * d_iput() operation if defined. Dentry has no refcount
 * and is unhashed.
Linus Torvalds's avatar
Linus Torvalds committed
329
 */
330
static void dentry_iput(struct dentry * dentry)
331
	__releases(dentry->d_lock)
332
	__releases(dentry->d_inode->i_lock)
Linus Torvalds's avatar
Linus Torvalds committed
333
334
335
{
	struct inode *inode = dentry->d_inode;
	if (inode) {
336
		__d_clear_type_and_inode(dentry);
337
		hlist_del_init(&dentry->d_u.d_alias);
Linus Torvalds's avatar
Linus Torvalds committed
338
		spin_unlock(&dentry->d_lock);
339
		spin_unlock(&inode->i_lock);
340
341
		if (!inode->i_nlink)
			fsnotify_inoderemove(inode);
Linus Torvalds's avatar
Linus Torvalds committed
342
343
344
345
346
347
348
349
350
		if (dentry->d_op && dentry->d_op->d_iput)
			dentry->d_op->d_iput(dentry, inode);
		else
			iput(inode);
	} else {
		spin_unlock(&dentry->d_lock);
	}
}

Nick Piggin's avatar
Nick Piggin committed
351
352
353
354
355
356
/*
 * Release the dentry's inode, using the filesystem
 * d_iput() operation if defined. dentry remains in-use.
 */
static void dentry_unlink_inode(struct dentry * dentry)
	__releases(dentry->d_lock)
357
	__releases(dentry->d_inode->i_lock)
Nick Piggin's avatar
Nick Piggin committed
358
359
{
	struct inode *inode = dentry->d_inode;
360
361

	raw_write_seqcount_begin(&dentry->d_seq);
362
	__d_clear_type_and_inode(dentry);
363
	hlist_del_init(&dentry->d_u.d_alias);
364
	raw_write_seqcount_end(&dentry->d_seq);
Nick Piggin's avatar
Nick Piggin committed
365
	spin_unlock(&dentry->d_lock);
366
	spin_unlock(&inode->i_lock);
Nick Piggin's avatar
Nick Piggin committed
367
368
369
370
371
372
373
374
	if (!inode->i_nlink)
		fsnotify_inoderemove(inode);
	if (dentry->d_op && dentry->d_op->d_iput)
		dentry->d_op->d_iput(dentry, inode);
	else
		iput(inode);
}

375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
/*
 * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
 * is in use - which includes both the "real" per-superblock
 * LRU list _and_ the DCACHE_SHRINK_LIST use.
 *
 * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
 * on the shrink list (ie not on the superblock LRU list).
 *
 * The per-cpu "nr_dentry_unused" counters are updated with
 * the DCACHE_LRU_LIST bit.
 *
 * These helper functions make sure we always follow the
 * rules. d_lock must be held by the caller.
 */
#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
static void d_lru_add(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, 0);
	dentry->d_flags |= DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_lru_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_shrink_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	list_del_init(&dentry->d_lru);
	dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	this_cpu_dec(nr_dentry_unused);
}

static void d_shrink_add(struct dentry *dentry, struct list_head *list)
{
	D_FLAG_VERIFY(dentry, 0);
	list_add(&dentry->d_lru, list);
	dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
}

/*
 * These can only be called under the global LRU lock, ie during the
 * callback for freeing the LRU list. "isolate" removes it from the
 * LRU lists entirely, while shrink_move moves it to the indicated
 * private list.
 */
428
static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
429
430
431
432
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
433
	list_lru_isolate(lru, &dentry->d_lru);
434
435
}

436
437
static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
			      struct list_head *list)
438
439
440
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags |= DCACHE_SHRINK_LIST;
441
	list_lru_isolate_move(lru, &dentry->d_lru, list);
442
443
}

444
/*
445
 * dentry_lru_(add|del)_list) must be called with d_lock held.
446
447
448
 */
static void dentry_lru_add(struct dentry *dentry)
{
449
450
	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
		d_lru_add(dentry);
451
452
}

Nick Piggin's avatar
Nick Piggin committed
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
/**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
 *
 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 * be found through a VFS lookup any more. Note that this is different from
 * deleting the dentry - d_delete will try to mark the dentry negative if
 * possible, giving a successful _negative_ lookup, while d_drop will
 * just make the cache lookup fail.
 *
 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 * reason (NFS timeouts or autofs deletes).
 *
 * __d_drop requires dentry->d_lock.
 */
void __d_drop(struct dentry *dentry)
{
470
	if (!d_unhashed(dentry)) {
471
		struct hlist_bl_head *b;
472
473
474
475
476
477
		/*
		 * Hashed dentries are normally on the dentry hashtable,
		 * with the exception of those newly allocated by
		 * d_obtain_alias, which are always IS_ROOT:
		 */
		if (unlikely(IS_ROOT(dentry)))
478
479
480
481
482
483
484
485
			b = &dentry->d_sb->s_anon;
		else
			b = d_hash(dentry->d_parent, dentry->d_name.hash);

		hlist_bl_lock(b);
		__hlist_bl_del(&dentry->d_hash);
		dentry->d_hash.pprev = NULL;
		hlist_bl_unlock(b);
486
		dentry_rcuwalk_invalidate(dentry);
Nick Piggin's avatar
Nick Piggin committed
487
488
489
490
491
492
493
494
495
496
497
498
	}
}
EXPORT_SYMBOL(__d_drop);

void d_drop(struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	__d_drop(dentry);
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(d_drop);

Al Viro's avatar
Al Viro committed
499
static void __dentry_kill(struct dentry *dentry)
500
{
501
502
503
	struct dentry *parent = NULL;
	bool can_free = true;
	if (!IS_ROOT(dentry))
504
		parent = dentry->d_parent;
Nick Piggin's avatar
Nick Piggin committed
505

506
507
508
509
510
	/*
	 * The dentry is now unrecoverably dead to the world.
	 */
	lockref_mark_dead(&dentry->d_lockref);

Sage Weil's avatar
Sage Weil committed
511
512
513
514
	/*
	 * inform the fs via d_prune that this dentry is about to be
	 * unhashed and destroyed.
	 */
515
	if (dentry->d_flags & DCACHE_OP_PRUNE)
Yan, Zheng's avatar
Yan, Zheng committed
516
517
		dentry->d_op->d_prune(dentry);

518
519
520
521
	if (dentry->d_flags & DCACHE_LRU_LIST) {
		if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
			d_lru_del(dentry);
	}
522
523
	/* if it was on the hash then remove it */
	__d_drop(dentry);
Al Viro's avatar
Al Viro committed
524
	__list_del_entry(&dentry->d_child);
Al Viro's avatar
Al Viro committed
525
526
527
528
529
530
531
532
533
534
535
536
	/*
	 * Inform d_walk() that we are no longer attached to the
	 * dentry tree
	 */
	dentry->d_flags |= DCACHE_DENTRY_KILLED;
	if (parent)
		spin_unlock(&parent->d_lock);
	dentry_iput(dentry);
	/*
	 * dentry_iput drops the locks, at which point nobody (except
	 * transient RCU lookups) can reach this dentry.
	 */
537
	BUG_ON(dentry->d_lockref.count > 0);
Al Viro's avatar
Al Viro committed
538
539
540
541
	this_cpu_dec(nr_dentry);
	if (dentry->d_op && dentry->d_op->d_release)
		dentry->d_op->d_release(dentry);

542
543
544
545
546
547
548
549
	spin_lock(&dentry->d_lock);
	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
		dentry->d_flags |= DCACHE_MAY_FREE;
		can_free = false;
	}
	spin_unlock(&dentry->d_lock);
	if (likely(can_free))
		dentry_free(dentry);
Al Viro's avatar
Al Viro committed
550
551
552
553
554
555
556
557
}

/*
 * Finish off a dentry we've decided to kill.
 * dentry->d_lock must be held, returns with it unlocked.
 * If ref is non-zero, then decrement the refcount too.
 * Returns dentry requiring refcount drop, or NULL if we're done.
 */
558
static struct dentry *dentry_kill(struct dentry *dentry)
Al Viro's avatar
Al Viro committed
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
	__releases(dentry->d_lock)
{
	struct inode *inode = dentry->d_inode;
	struct dentry *parent = NULL;

	if (inode && unlikely(!spin_trylock(&inode->i_lock)))
		goto failed;

	if (!IS_ROOT(dentry)) {
		parent = dentry->d_parent;
		if (unlikely(!spin_trylock(&parent->d_lock))) {
			if (inode)
				spin_unlock(&inode->i_lock);
			goto failed;
		}
	}

	__dentry_kill(dentry);
Al Viro's avatar
Al Viro committed
577
	return parent;
Al Viro's avatar
Al Viro committed
578
579

failed:
580
581
	spin_unlock(&dentry->d_lock);
	cpu_relax();
Al Viro's avatar
Al Viro committed
582
	return dentry; /* try again with same dentry */
583
584
}

585
586
587
588
589
static inline struct dentry *lock_parent(struct dentry *dentry)
{
	struct dentry *parent = dentry->d_parent;
	if (IS_ROOT(dentry))
		return NULL;
590
	if (unlikely(dentry->d_lockref.count < 0))
591
		return NULL;
592
593
594
	if (likely(spin_trylock(&parent->d_lock)))
		return parent;
	rcu_read_lock();
595
	spin_unlock(&dentry->d_lock);
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
again:
	parent = ACCESS_ONCE(dentry->d_parent);
	spin_lock(&parent->d_lock);
	/*
	 * We can't blindly lock dentry until we are sure
	 * that we won't violate the locking order.
	 * Any changes of dentry->d_parent must have
	 * been done with parent->d_lock held, so
	 * spin_lock() above is enough of a barrier
	 * for checking if it's still our child.
	 */
	if (unlikely(parent != dentry->d_parent)) {
		spin_unlock(&parent->d_lock);
		goto again;
	}
	rcu_read_unlock();
	if (parent != dentry)
613
		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
614
615
616
617
618
	else
		parent = NULL;
	return parent;
}

619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
/*
 * Try to do a lockless dput(), and return whether that was successful.
 *
 * If unsuccessful, we return false, having already taken the dentry lock.
 *
 * The caller needs to hold the RCU read lock, so that the dentry is
 * guaranteed to stay around even if the refcount goes down to zero!
 */
static inline bool fast_dput(struct dentry *dentry)
{
	int ret;
	unsigned int d_flags;

	/*
	 * If we have a d_op->d_delete() operation, we sould not
634
	 * let the dentry count go to zero, so use "put_or_lock".
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
	 */
	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
		return lockref_put_or_lock(&dentry->d_lockref);

	/*
	 * .. otherwise, we can try to just decrement the
	 * lockref optimistically.
	 */
	ret = lockref_put_return(&dentry->d_lockref);

	/*
	 * If the lockref_put_return() failed due to the lock being held
	 * by somebody else, the fast path has failed. We will need to
	 * get the lock, and then check the count again.
	 */
	if (unlikely(ret < 0)) {
		spin_lock(&dentry->d_lock);
		if (dentry->d_lockref.count > 1) {
			dentry->d_lockref.count--;
			spin_unlock(&dentry->d_lock);
			return 1;
		}
		return 0;
	}

	/*
	 * If we weren't the last ref, we're done.
	 */
	if (ret)
		return 1;

	/*
	 * Careful, careful. The reference count went down
	 * to zero, but we don't hold the dentry lock, so
	 * somebody else could get it again, and do another
	 * dput(), and we need to not race with that.
	 *
	 * However, there is a very special and common case
	 * where we don't care, because there is nothing to
	 * do: the dentry is still hashed, it does not have
	 * a 'delete' op, and it's referenced and already on
	 * the LRU list.
	 *
	 * NOTE! Since we aren't locked, these values are
	 * not "stable". However, it is sufficient that at
	 * some point after we dropped the reference the
	 * dentry was hashed and the flags had the proper
	 * value. Other dentry users may have re-gotten
	 * a reference to the dentry and change that, but
	 * our work is done - we can leave the dentry
	 * around with a zero refcount.
	 */
	smp_rmb();
	d_flags = ACCESS_ONCE(dentry->d_flags);
689
	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722

	/* Nothing to do? Dropping the reference was all we needed? */
	if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
		return 1;

	/*
	 * Not the fast normal case? Get the lock. We've already decremented
	 * the refcount, but we'll need to re-check the situation after
	 * getting the lock.
	 */
	spin_lock(&dentry->d_lock);

	/*
	 * Did somebody else grab a reference to it in the meantime, and
	 * we're no longer the last user after all? Alternatively, somebody
	 * else could have killed it and marked it dead. Either way, we
	 * don't need to do anything else.
	 */
	if (dentry->d_lockref.count) {
		spin_unlock(&dentry->d_lock);
		return 1;
	}

	/*
	 * Re-get the reference we optimistically dropped. We hold the
	 * lock, and we just tested that it was zero, so we can just
	 * set it to 1.
	 */
	dentry->d_lockref.count = 1;
	return 0;
}


Linus Torvalds's avatar
Linus Torvalds committed
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
/* 
 * This is dput
 *
 * This is complicated by the fact that we do not want to put
 * dentries that are no longer on any hash chain on the unused
 * list: we'd much rather just get rid of them immediately.
 *
 * However, that implies that we have to traverse the dentry
 * tree upwards to the parents which might _also_ now be
 * scheduled for deletion (it may have been only waiting for
 * its last child to go away).
 *
 * This tail recursion is done by hand as we don't want to depend
 * on the compiler to always get this right (gcc generally doesn't).
 * Real recursion would eat up our stack space.
 */

/*
 * dput - release a dentry
 * @dentry: dentry to release 
 *
 * Release a dentry. This will drop the usage count and if appropriate
 * call the dentry unlink method as well as removing it from the queues and
 * releasing its resources. If the parent dentries were scheduled for release
 * they too may now get deleted.
 */
void dput(struct dentry *dentry)
{
751
	if (unlikely(!dentry))
Linus Torvalds's avatar
Linus Torvalds committed
752
753
754
		return;

repeat:
755
756
757
	rcu_read_lock();
	if (likely(fast_dput(dentry))) {
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
758
		return;
759
760
761
762
	}

	/* Slow case: now with the dentry lock held */
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
763

764
765
766
767
	/* Unreachable? Get rid of it */
	if (unlikely(d_unhashed(dentry)))
		goto kill_it;

768
769
770
	if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
		goto kill_it;

771
	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
Linus Torvalds's avatar
Linus Torvalds committed
772
		if (dentry->d_op->d_delete(dentry))
Nick Piggin's avatar
Nick Piggin committed
773
			goto kill_it;
Linus Torvalds's avatar
Linus Torvalds committed
774
	}
775

776
777
	if (!(dentry->d_flags & DCACHE_REFERENCED))
		dentry->d_flags |= DCACHE_REFERENCED;
778
	dentry_lru_add(dentry);
779

780
	dentry->d_lockref.count--;
Nick Piggin's avatar
Nick Piggin committed
781
	spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
782
783
	return;

784
kill_it:
785
	dentry = dentry_kill(dentry);
786
787
	if (dentry)
		goto repeat;
Linus Torvalds's avatar
Linus Torvalds committed
788
}
789
EXPORT_SYMBOL(dput);
Linus Torvalds's avatar
Linus Torvalds committed
790
791


Nick Piggin's avatar
Nick Piggin committed
792
/* This must be called with d_lock held */
793
static inline void __dget_dlock(struct dentry *dentry)
Nick Piggin's avatar
Nick Piggin committed
794
{
795
	dentry->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
796
797
}

798
static inline void __dget(struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
799
{
800
	lockref_get(&dentry->d_lockref);
Linus Torvalds's avatar
Linus Torvalds committed
801
802
}

Nick Piggin's avatar
Nick Piggin committed
803
804
struct dentry *dget_parent(struct dentry *dentry)
{
805
	int gotref;
Nick Piggin's avatar
Nick Piggin committed
806
807
	struct dentry *ret;

808
809
810
811
812
813
814
815
816
817
818
819
820
821
	/*
	 * Do optimistic parent lookup without any
	 * locking.
	 */
	rcu_read_lock();
	ret = ACCESS_ONCE(dentry->d_parent);
	gotref = lockref_get_not_zero(&ret->d_lockref);
	rcu_read_unlock();
	if (likely(gotref)) {
		if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
			return ret;
		dput(ret);
	}

Nick Piggin's avatar
Nick Piggin committed
822
repeat:
823
824
825
826
827
	/*
	 * Don't need rcu_dereference because we re-check it was correct under
	 * the lock.
	 */
	rcu_read_lock();
Nick Piggin's avatar
Nick Piggin committed
828
	ret = dentry->d_parent;
829
830
831
832
	spin_lock(&ret->d_lock);
	if (unlikely(ret != dentry->d_parent)) {
		spin_unlock(&ret->d_lock);
		rcu_read_unlock();
Nick Piggin's avatar
Nick Piggin committed
833
834
		goto repeat;
	}
835
	rcu_read_unlock();
836
837
	BUG_ON(!ret->d_lockref.count);
	ret->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
838
839
840
841
842
	spin_unlock(&ret->d_lock);
	return ret;
}
EXPORT_SYMBOL(dget_parent);

Linus Torvalds's avatar
Linus Torvalds committed
843
844
845
846
847
848
849
850
/**
 * d_find_alias - grab a hashed alias of inode
 * @inode: inode in question
 *
 * If inode has a hashed alias, or is a directory and has any alias,
 * acquire the reference to alias and return it. Otherwise return NULL.
 * Notice that if inode is a directory there can be only one alias and
 * it can be unhashed only if it has no children, or if it is the root
851
852
 * of a filesystem, or if the directory was renamed and d_revalidate
 * was the first vfs operation to notice.
Linus Torvalds's avatar
Linus Torvalds committed
853
 *
854
 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
855
 * any other hashed alias over that one.
Linus Torvalds's avatar
Linus Torvalds committed
856
 */
857
static struct dentry *__d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
858
{
Nick Piggin's avatar
Nick Piggin committed
859
	struct dentry *alias, *discon_alias;
Linus Torvalds's avatar
Linus Torvalds committed
860

Nick Piggin's avatar
Nick Piggin committed
861
862
again:
	discon_alias = NULL;
863
	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
Nick Piggin's avatar
Nick Piggin committed
864
		spin_lock(&alias->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
865
 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
866
			if (IS_ROOT(alias) &&
Nick Piggin's avatar
Nick Piggin committed
867
			    (alias->d_flags & DCACHE_DISCONNECTED)) {
Linus Torvalds's avatar
Linus Torvalds committed
868
				discon_alias = alias;
869
			} else {
870
				__dget_dlock(alias);
Nick Piggin's avatar
Nick Piggin committed
871
872
873
874
875
876
877
878
879
880
				spin_unlock(&alias->d_lock);
				return alias;
			}
		}
		spin_unlock(&alias->d_lock);
	}
	if (discon_alias) {
		alias = discon_alias;
		spin_lock(&alias->d_lock);
		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
881
882
883
			__dget_dlock(alias);
			spin_unlock(&alias->d_lock);
			return alias;
Linus Torvalds's avatar
Linus Torvalds committed
884
		}
Nick Piggin's avatar
Nick Piggin committed
885
886
		spin_unlock(&alias->d_lock);
		goto again;
Linus Torvalds's avatar
Linus Torvalds committed
887
	}
Nick Piggin's avatar
Nick Piggin committed
888
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
889
890
}

Nick Piggin's avatar
Nick Piggin committed
891
struct dentry *d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
892
{
893
894
	struct dentry *de = NULL;

895
	if (!hlist_empty(&inode->i_dentry)) {
896
		spin_lock(&inode->i_lock);
897
		de = __d_find_alias(inode);
898
		spin_unlock(&inode->i_lock);
899
	}
Linus Torvalds's avatar
Linus Torvalds committed
900
901
	return de;
}
902
EXPORT_SYMBOL(d_find_alias);
Linus Torvalds's avatar
Linus Torvalds committed
903
904
905
906
907
908
909

/*
 *	Try to kill dentries associated with this inode.
 * WARNING: you must own a reference to inode.
 */
void d_prune_aliases(struct inode *inode)
{
910
	struct dentry *dentry;
Linus Torvalds's avatar
Linus Torvalds committed
911
restart:
912
	spin_lock(&inode->i_lock);
913
	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
Linus Torvalds's avatar
Linus Torvalds committed
914
		spin_lock(&dentry->d_lock);
915
		if (!dentry->d_lockref.count) {
916
917
918
			struct dentry *parent = lock_parent(dentry);
			if (likely(!dentry->d_lockref.count)) {
				__dentry_kill(dentry);
919
				dput(parent);
920
921
922
923
				goto restart;
			}
			if (parent)
				spin_unlock(&parent->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
924
925
926
		}
		spin_unlock(&dentry->d_lock);
	}
927
	spin_unlock(&inode->i_lock);
Linus Torvalds's avatar
Linus Torvalds committed
928
}
929
EXPORT_SYMBOL(d_prune_aliases);
Linus Torvalds's avatar
Linus Torvalds committed
930

931
static void shrink_dentry_list(struct list_head *list)
Linus Torvalds's avatar
Linus Torvalds committed
932
{
Al Viro's avatar
Al Viro committed
933
	struct dentry *dentry, *parent;
934

935
	while (!list_empty(list)) {
936
		struct inode *inode;
937
		dentry = list_entry(list->prev, struct dentry, d_lru);
938
		spin_lock(&dentry->d_lock);
939
940
		parent = lock_parent(dentry);

941
942
943
944
945
		/*
		 * The dispose list is isolated and dentries are not accounted
		 * to the LRU here, so we can simply remove it from the list
		 * here regardless of whether it is referenced or not.
		 */
946
		d_shrink_del(dentry);
947

Linus Torvalds's avatar
Linus Torvalds committed
948
949
		/*
		 * We found an inuse dentry which was not removed from
950
		 * the LRU because of laziness during lookup. Do not free it.
Linus Torvalds's avatar
Linus Torvalds committed
951
		 */
952
		if (dentry->d_lockref.count > 0) {
953
			spin_unlock(&dentry->d_lock);
954
955
			if (parent)
				spin_unlock(&parent->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
956
957
			continue;
		}
958

959
960
961
962

		if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
			bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
			spin_unlock(&dentry->d_lock);
963
964
			if (parent)
				spin_unlock(&parent->d_lock);
965
966
967
968
969
			if (can_free)
				dentry_free(dentry);
			continue;
		}

970
971
		inode = dentry->d_inode;
		if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
972
			d_shrink_add(dentry, list);
973
			spin_unlock(&dentry->d_lock);
974
975
			if (parent)
				spin_unlock(&parent->d_lock);
Al Viro's avatar
Al Viro committed
976
			continue;
977
		}
978
979

		__dentry_kill(dentry);
980

Al Viro's avatar
Al Viro committed
981
982
983
984
985
986
987
		/*
		 * We need to prune ancestors too. This is necessary to prevent
		 * quadratic behavior of shrink_dcache_parent(), but is also
		 * expected to be beneficial in reducing dentry cache
		 * fragmentation.
		 */
		dentry = parent;
988
989
990
991
992
993
994
995
996
997
998
999
1000
		while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
			parent = lock_parent(dentry);
			if (dentry->d_lockref.count != 1) {
				dentry->d_lockref.count--;
				spin_unlock(&dentry->d_lock);
				if (parent)
					spin_unlock(&parent->d_lock);
				break;
			}
			inode = dentry->d_inode;	/* can't be NULL */
			if (unlikely(!spin_trylock(&inode->i_lock))) {
				spin_unlock(&dentry->d_lock);
				if (parent)
For faster browsing, not all history is shown. View entire blame