Commit 67cf5b09 authored by Tao Ma's avatar Tao Ma Committed by Theodore Ts'o
Browse files

ext4: add the basic function for inline data support



Implement inline data with xattr.

Now we use "system.data" to store xattr, and the xattr will
be extended if the i_size is increased while we don't release
the space during truncate.

Signed-off-by: default avatarTao Ma <boyu.mt@taobao.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 879b3825
......@@ -9,6 +9,6 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
mmp.o indirect.o extents_status.o
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o inline.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
......@@ -402,6 +402,7 @@ struct flex_groups {
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
......@@ -458,6 +459,7 @@ enum {
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};
......@@ -504,6 +506,7 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(EXTENTS);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(INLINE_DATA);
CHECK_FLAG_VALUE(RESERVED);
}
......@@ -918,6 +921,10 @@ struct ext4_inode_info {
/* on-disk additional length */
__u16 i_extra_isize;
/* Indicate the inline data space. */
u16 i_inline_off;
u16 i_inline_size;
#ifdef CONFIG_QUOTA
/* quota space reservation, managed internally by quota code */
qsize_t i_reserved_quota;
......@@ -1376,6 +1383,7 @@ enum {
EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
......@@ -1497,7 +1505,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
......
/*
* Copyright (c) 2012 Taobao.
* Written by Tao Ma <boyu.mt@taobao.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
#define EXT4_XATTR_SYSTEM_DATA "data"
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
int ext4_get_inline_size(struct inode *inode)
{
if (EXT4_I(inode)->i_inline_off)
return EXT4_I(inode)->i_inline_size;
return 0;
}
static int get_max_inline_xattr_value_size(struct inode *inode,
struct ext4_iloc *iloc)
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
struct ext4_inode *raw_inode;
int free, min_offs;
min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
EXT4_GOOD_OLD_INODE_SIZE -
EXT4_I(inode)->i_extra_isize -
sizeof(struct ext4_xattr_ibody_header);
/*
* We need to subtract another sizeof(__u32) since an in-inode xattr
* needs an empty 4 bytes to indicate the gap between the xattr entry
* and the name/value pair.
*/
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
return EXT4_XATTR_SIZE(min_offs -
EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) -
EXT4_XATTR_ROUND - sizeof(__u32));
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_block && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
}
free = min_offs -
((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
if (EXT4_I(inode)->i_inline_off) {
entry = (struct ext4_xattr_entry *)
((void *)raw_inode + EXT4_I(inode)->i_inline_off);
free += le32_to_cpu(entry->e_value_size);
goto out;
}
free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA));
if (free > EXT4_XATTR_ROUND)
free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND);
else
free = 0;
out:
return free;
}
/*
* Get the maximum size we now can store in an inode.
* If we can't find the space for a xattr entry, don't use the space
* of the extents since we have no space to indicate the inline data.
*/
int ext4_get_max_inline_size(struct inode *inode)
{
int error, max_inline_size;
struct ext4_iloc iloc;
if (EXT4_I(inode)->i_extra_isize == 0)
return 0;
error = ext4_get_inode_loc(inode, &iloc);
if (error) {
ext4_error_inode(inode, __func__, __LINE__, 0,
"can't get inode location %lu",
inode->i_ino);
return 0;
}
down_read(&EXT4_I(inode)->xattr_sem);
max_inline_size = get_max_inline_xattr_value_size(inode, &iloc);
up_read(&EXT4_I(inode)->xattr_sem);
brelse(iloc.bh);
if (!max_inline_size)
return 0;
return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
}
int ext4_has_inline_data(struct inode *inode)
{
return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
EXT4_I(inode)->i_inline_off;
}
/*
* this function does not take xattr_sem, which is OK because it is
* currently only used in a code path coming form ext4_iget, before
* the new inode has been unlocked
*/
int ext4_find_inline_data_nolock(struct inode *inode)
{
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
};
struct ext4_xattr_info i = {
.name_index = EXT4_XATTR_INDEX_SYSTEM,
.name = EXT4_XATTR_SYSTEM_DATA,
};
int error;
if (EXT4_I(inode)->i_extra_isize == 0)
return 0;
error = ext4_get_inode_loc(inode, &is.iloc);
if (error)
return error;
error = ext4_xattr_ibody_find(inode, &i, &is);
if (error)
goto out;
if (!is.s.not_found) {
EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
out:
brelse(is.iloc.bh);
return error;
}
static int ext4_read_inline_data(struct inode *inode, void *buffer,
unsigned int len,
struct ext4_iloc *iloc)
{
struct ext4_xattr_entry *entry;
struct ext4_xattr_ibody_header *header;
int cp_len = 0;
struct ext4_inode *raw_inode;
if (!len)
return 0;
BUG_ON(len > EXT4_I(inode)->i_inline_size);
cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ?
len : EXT4_MIN_INLINE_DATA_SIZE;
raw_inode = ext4_raw_inode(iloc);
memcpy(buffer, (void *)(raw_inode->i_block), cp_len);
len -= cp_len;
buffer += cp_len;
if (!len)
goto out;
header = IHDR(inode, raw_inode);
entry = (struct ext4_xattr_entry *)((void *)raw_inode +
EXT4_I(inode)->i_inline_off);
len = min_t(unsigned int, len,
(unsigned int)le32_to_cpu(entry->e_value_size));
memcpy(buffer,
(void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len);
cp_len += len;
out:
return cp_len;
}
/*
* write the buffer to the inline inode.
* If 'create' is set, we don't need to do the extra copy in the xattr
* value since it is already handled by ext4_xattr_ibody_set. That saves
* us one memcpy.
*/
void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
void *buffer, loff_t pos, unsigned int len)
{
struct ext4_xattr_entry *entry;
struct ext4_xattr_ibody_header *header;
struct ext4_inode *raw_inode;
int cp_len = 0;
BUG_ON(!EXT4_I(inode)->i_inline_off);
BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);
raw_inode = ext4_raw_inode(iloc);
buffer += pos;
if (pos < EXT4_MIN_INLINE_DATA_SIZE) {
cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ?
EXT4_MIN_INLINE_DATA_SIZE - pos : len;
memcpy((void *)raw_inode->i_block + pos, buffer, cp_len);
len -= cp_len;
buffer += cp_len;
pos += cp_len;
}
if (!len)
return;
pos -= EXT4_MIN_INLINE_DATA_SIZE;
header = IHDR(inode, raw_inode);
entry = (struct ext4_xattr_entry *)((void *)raw_inode +
EXT4_I(inode)->i_inline_off);
memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos,
buffer, len);
}
static int ext4_create_inline_data(handle_t *handle,
struct inode *inode, unsigned len)
{
int error;
void *value = NULL;
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
};
struct ext4_xattr_info i = {
.name_index = EXT4_XATTR_INDEX_SYSTEM,
.name = EXT4_XATTR_SYSTEM_DATA,
};
error = ext4_get_inode_loc(inode, &is.iloc);
if (error)
return error;
error = ext4_journal_get_write_access(handle, is.iloc.bh);
if (error)
goto out;
if (len > EXT4_MIN_INLINE_DATA_SIZE) {
value = (void *)empty_zero_page;
len -= EXT4_MIN_INLINE_DATA_SIZE;
} else {
value = "";
len = 0;
}
/* Insert the the xttr entry. */
i.value = value;
i.value_len = len;
error = ext4_xattr_ibody_find(inode, &i, &is);
if (error)
goto out;
BUG_ON(!is.s.not_found);
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error) {
if (error == -ENOSPC)
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
goto out;
}
memset((void *)ext4_raw_inode(&is.iloc)->i_block,
0, EXT4_MIN_INLINE_DATA_SIZE);
EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
out:
brelse(is.iloc.bh);
return error;
}
static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
unsigned int len)
{
int error;
void *value = NULL;
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
};
struct ext4_xattr_info i = {
.name_index = EXT4_XATTR_INDEX_SYSTEM,
.name = EXT4_XATTR_SYSTEM_DATA,
};
/* If the old space is ok, write the data directly. */
if (len <= EXT4_I(inode)->i_inline_size)
return 0;
error = ext4_get_inode_loc(inode, &is.iloc);
if (error)
return error;
error = ext4_xattr_ibody_find(inode, &i, &is);
if (error)
goto out;
BUG_ON(is.s.not_found);
len -= EXT4_MIN_INLINE_DATA_SIZE;
value = kzalloc(len, GFP_NOFS);
if (!value)
goto out;
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
if (error == -ENODATA)
goto out;
error = ext4_journal_get_write_access(handle, is.iloc.bh);
if (error)
goto out;
/* Update the xttr entry. */
i.value = value;
i.value_len = len;
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
out:
kfree(value);
brelse(is.iloc.bh);
return error;
}
int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
unsigned int len)
{
int ret, size;
struct ext4_inode_info *ei = EXT4_I(inode);
if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
return -ENOSPC;
size = ext4_get_max_inline_size(inode);
if (size < len)
return -ENOSPC;
down_write(&EXT4_I(inode)->xattr_sem);
if (ei->i_inline_off)
ret = ext4_update_inline_data(handle, inode, len);
else
ret = ext4_create_inline_data(handle, inode, len);
up_write(&EXT4_I(inode)->xattr_sem);
return ret;
}
static int ext4_destroy_inline_data_nolock(handle_t *handle,
struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_xattr_ibody_find is = {
.s = { .not_found = 0, },
};
struct ext4_xattr_info i = {
.name_index = EXT4_XATTR_INDEX_SYSTEM,
.name = EXT4_XATTR_SYSTEM_DATA,
.value = NULL,
.value_len = 0,
};
int error;
if (!ei->i_inline_off)
return 0;
error = ext4_get_inode_loc(inode, &is.iloc);
if (error)
return error;
error = ext4_xattr_ibody_find(inode, &i, &is);
if (error)
goto out;
error = ext4_journal_get_write_access(handle, is.iloc.bh);
if (error)
goto out;
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
memset((void *)ext4_raw_inode(&is.iloc)->i_block,
0, EXT4_MIN_INLINE_DATA_SIZE);
if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_INCOMPAT_EXTENTS)) {
if (S_ISDIR(inode->i_mode) ||
S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) {
ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_ext_tree_init(handle, inode);
}
}
ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
EXT4_I(inode)->i_inline_off = 0;
EXT4_I(inode)->i_inline_size = 0;
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
out:
brelse(is.iloc.bh);
if (error == -ENODATA)
error = 0;
return error;
}
int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
{
int ret;
down_write(&EXT4_I(inode)->xattr_sem);
ret = ext4_destroy_inline_data_nolock(handle, inode);
up_write(&EXT4_I(inode)->xattr_sem);
return ret;
}
......@@ -3706,8 +3706,10 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
{
__le32 *magic = (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
ext4_find_inline_data_nolock(inode);
}
}
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
......@@ -3780,6 +3782,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ei->i_inline_off = 0;
ei->i_dir_start_lookup = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
......
......@@ -21,6 +21,7 @@
#define EXT4_XATTR_INDEX_TRUSTED 4
#define EXT4_XATTR_INDEX_LUSTRE 5
#define EXT4_XATTR_INDEX_SECURITY 6
#define EXT4_XATTR_INDEX_SYSTEM 7
struct ext4_xattr_header {
__le32 h_magic; /* magic number for identification */
......@@ -125,6 +126,19 @@ extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is);
extern int ext4_has_inline_data(struct inode *inode);
extern int ext4_get_inline_size(struct inode *inode);
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern void ext4_write_inline_data(struct inode *inode,
struct ext4_iloc *iloc,
void *buffer, loff_t pos,
unsigned int len);
extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
# else /* CONFIG_EXT4_FS_XATTR */
static inline int
......@@ -201,6 +215,46 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index,
return -EOPNOTSUPP;
}
static inline int ext4_find_inline_data_nolock(struct inode *inode)
{
return 0;
}
static inline int ext4_has_inline_data(struct inode *inode)
{
return 0;
}
static inline int ext4_get_inline_size(struct inode *inode)
{
return 0;
}
static inline int ext4_get_max_inline_size(struct inode *inode)
{
return 0;
}
static inline void ext4_write_inline_data(struct inode *inode,
struct ext4_iloc *iloc,
void *buffer, loff_t pos,
unsigned int len)
{
return;
}
static inline int ext4_init_inline_data(handle_t *handle,
struct inode *inode,
unsigned int len)
{
return 0;
}
static inline int ext4_destroy_inline_data(handle_t *handle,
struct inode *inode)
{
return 0;
}
# endif /* CONFIG_EXT4_FS_XATTR */
#ifdef CONFIG_EXT4_FS_SECURITY
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment