mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-08 01:08:03 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
75
fs/ext4/Kconfig
Normal file
75
fs/ext4/Kconfig
Normal file
|
@ -0,0 +1,75 @@
|
|||
config EXT4_FS
|
||||
tristate "The Extended 4 (ext4) filesystem"
|
||||
select JBD2
|
||||
select CRC16
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32C
|
||||
help
|
||||
This is the next generation of the ext3 filesystem.
|
||||
|
||||
Unlike the change from ext2 filesystem to ext3 filesystem,
|
||||
the on-disk format of ext4 is not forwards compatible with
|
||||
ext3; it is based on extent maps and it supports 48-bit
|
||||
physical block numbers. The ext4 filesystem also supports delayed
|
||||
allocation, persistent preallocation, high resolution time stamps,
|
||||
and a number of other features to improve performance and speed
|
||||
up fsck time. For more information, please see the web pages at
|
||||
http://ext4.wiki.kernel.org.
|
||||
|
||||
The ext4 filesystem will support mounting an ext3
|
||||
filesystem; while there will be some performance gains from
|
||||
the delayed allocation and inode table readahead, the best
|
||||
performance gains will require enabling ext4 features in the
|
||||
filesystem, or formatting a new filesystem as an ext4
|
||||
filesystem initially.
|
||||
|
||||
To compile this file system support as a module, choose M here. The
|
||||
module will be called ext4.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config EXT4_USE_FOR_EXT23
|
||||
bool "Use ext4 for ext2/ext3 file systems"
|
||||
depends on EXT4_FS
|
||||
depends on EXT3_FS=n || EXT2_FS=n
|
||||
default y
|
||||
help
|
||||
Allow the ext4 file system driver code to be used for ext2 or
|
||||
ext3 file system mounts. This allows users to reduce their
|
||||
compiled kernel size by using one file system driver for
|
||||
ext2, ext3, and ext4 file systems.
|
||||
|
||||
config EXT4_FS_POSIX_ACL
|
||||
bool "Ext4 POSIX Access Control Lists"
|
||||
depends on EXT4_FS
|
||||
select FS_POSIX_ACL
|
||||
help
|
||||
POSIX Access Control Lists (ACLs) support permissions for users and
|
||||
groups beyond the owner/group/world scheme.
|
||||
|
||||
To learn more about Access Control Lists, visit the POSIX ACLs for
|
||||
Linux website <http://acl.bestbits.at/>.
|
||||
|
||||
If you don't know what Access Control Lists are, say N
|
||||
|
||||
config EXT4_FS_SECURITY
|
||||
bool "Ext4 Security Labels"
|
||||
depends on EXT4_FS
|
||||
help
|
||||
Security labels support alternative access control models
|
||||
implemented by security modules like SELinux. This option
|
||||
enables an extended attribute handler for file security
|
||||
labels in the ext4 filesystem.
|
||||
|
||||
If you are not using a security module that requires using
|
||||
extended attributes for file security labels, say N.
|
||||
|
||||
config EXT4_DEBUG
|
||||
bool "EXT4 debugging support"
|
||||
depends on EXT4_FS
|
||||
help
|
||||
Enables run-time debugging support for the ext4 filesystem.
|
||||
|
||||
If you select Y here, then you will be able to turn on debugging
|
||||
with a command such as:
|
||||
echo 1 > /sys/module/ext4/parameters/mballoc_debug
|
18
fs/ext4/Makefile
Normal file
18
fs/ext4/Makefile
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# Makefile for the linux ext4-filesystem routines.
|
||||
#
|
||||
|
||||
ifeq ($(SEC_BUILD_CONF_QUICK_DMVERITY),true)
|
||||
EXTRA_CFLAGS += -DVERIFY_META_ONLY=true
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_EXT4_FS) += ext4.o
|
||||
|
||||
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
|
||||
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
|
||||
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
|
||||
mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
|
||||
xattr_trusted.o inline.o
|
||||
|
||||
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
|
||||
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
|
288
fs/ext4/acl.c
Normal file
288
fs/ext4/acl.c
Normal file
|
@ -0,0 +1,288 @@
|
|||
/*
|
||||
* linux/fs/ext4/acl.c
|
||||
*
|
||||
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/fs.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
/*
|
||||
* Convert from filesystem to in-memory representation.
|
||||
*/
|
||||
static struct posix_acl *
|
||||
ext4_acl_from_disk(const void *value, size_t size)
|
||||
{
|
||||
const char *end = (char *)value + size;
|
||||
int n, count;
|
||||
struct posix_acl *acl;
|
||||
|
||||
if (!value)
|
||||
return NULL;
|
||||
if (size < sizeof(ext4_acl_header))
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (((ext4_acl_header *)value)->a_version !=
|
||||
cpu_to_le32(EXT4_ACL_VERSION))
|
||||
return ERR_PTR(-EINVAL);
|
||||
value = (char *)value + sizeof(ext4_acl_header);
|
||||
count = ext4_acl_count(size);
|
||||
if (count < 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (count == 0)
|
||||
return NULL;
|
||||
acl = posix_acl_alloc(count, GFP_NOFS);
|
||||
if (!acl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
for (n = 0; n < count; n++) {
|
||||
ext4_acl_entry *entry =
|
||||
(ext4_acl_entry *)value;
|
||||
if ((char *)value + sizeof(ext4_acl_entry_short) > end)
|
||||
goto fail;
|
||||
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
|
||||
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
|
||||
|
||||
switch (acl->a_entries[n].e_tag) {
|
||||
case ACL_USER_OBJ:
|
||||
case ACL_GROUP_OBJ:
|
||||
case ACL_MASK:
|
||||
case ACL_OTHER:
|
||||
value = (char *)value +
|
||||
sizeof(ext4_acl_entry_short);
|
||||
break;
|
||||
|
||||
case ACL_USER:
|
||||
value = (char *)value + sizeof(ext4_acl_entry);
|
||||
if ((char *)value > end)
|
||||
goto fail;
|
||||
acl->a_entries[n].e_uid =
|
||||
make_kuid(&init_user_ns,
|
||||
le32_to_cpu(entry->e_id));
|
||||
break;
|
||||
case ACL_GROUP:
|
||||
value = (char *)value + sizeof(ext4_acl_entry);
|
||||
if ((char *)value > end)
|
||||
goto fail;
|
||||
acl->a_entries[n].e_gid =
|
||||
make_kgid(&init_user_ns,
|
||||
le32_to_cpu(entry->e_id));
|
||||
break;
|
||||
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
if (value != end)
|
||||
goto fail;
|
||||
return acl;
|
||||
|
||||
fail:
|
||||
posix_acl_release(acl);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert from in-memory to filesystem representation.
|
||||
*/
|
||||
static void *
|
||||
ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
|
||||
{
|
||||
ext4_acl_header *ext_acl;
|
||||
char *e;
|
||||
size_t n;
|
||||
|
||||
*size = ext4_acl_size(acl->a_count);
|
||||
ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
|
||||
sizeof(ext4_acl_entry), GFP_NOFS);
|
||||
if (!ext_acl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
|
||||
e = (char *)ext_acl + sizeof(ext4_acl_header);
|
||||
for (n = 0; n < acl->a_count; n++) {
|
||||
const struct posix_acl_entry *acl_e = &acl->a_entries[n];
|
||||
ext4_acl_entry *entry = (ext4_acl_entry *)e;
|
||||
entry->e_tag = cpu_to_le16(acl_e->e_tag);
|
||||
entry->e_perm = cpu_to_le16(acl_e->e_perm);
|
||||
switch (acl_e->e_tag) {
|
||||
case ACL_USER:
|
||||
entry->e_id = cpu_to_le32(
|
||||
from_kuid(&init_user_ns, acl_e->e_uid));
|
||||
e += sizeof(ext4_acl_entry);
|
||||
break;
|
||||
case ACL_GROUP:
|
||||
entry->e_id = cpu_to_le32(
|
||||
from_kgid(&init_user_ns, acl_e->e_gid));
|
||||
e += sizeof(ext4_acl_entry);
|
||||
break;
|
||||
|
||||
case ACL_USER_OBJ:
|
||||
case ACL_GROUP_OBJ:
|
||||
case ACL_MASK:
|
||||
case ACL_OTHER:
|
||||
e += sizeof(ext4_acl_entry_short);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
return (char *)ext_acl;
|
||||
|
||||
fail:
|
||||
kfree(ext_acl);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode operation get_posix_acl().
|
||||
*
|
||||
* inode->i_mutex: don't care
|
||||
*/
|
||||
struct posix_acl *
|
||||
ext4_get_acl(struct inode *inode, int type)
|
||||
{
|
||||
int name_index;
|
||||
char *value = NULL;
|
||||
struct posix_acl *acl;
|
||||
int retval;
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
|
||||
if (retval > 0) {
|
||||
value = kmalloc(retval, GFP_NOFS);
|
||||
if (!value)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
retval = ext4_xattr_get(inode, name_index, "", value, retval);
|
||||
}
|
||||
if (retval > 0)
|
||||
acl = ext4_acl_from_disk(value, retval);
|
||||
else if (retval == -ENODATA || retval == -ENOSYS)
|
||||
acl = NULL;
|
||||
else
|
||||
acl = ERR_PTR(retval);
|
||||
kfree(value);
|
||||
|
||||
if (!IS_ERR(acl))
|
||||
set_cached_acl(inode, type, acl);
|
||||
|
||||
return acl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the access or default ACL of an inode.
|
||||
*
|
||||
* inode->i_mutex: down unless called from ext4_new_inode
|
||||
*/
|
||||
static int
|
||||
__ext4_set_acl(handle_t *handle, struct inode *inode, int type,
|
||||
struct posix_acl *acl)
|
||||
{
|
||||
int name_index;
|
||||
void *value = NULL;
|
||||
size_t size = 0;
|
||||
int error;
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
|
||||
if (acl) {
|
||||
error = posix_acl_equiv_mode(acl, &inode->i_mode);
|
||||
if (error < 0)
|
||||
return error;
|
||||
else {
|
||||
inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
if (error == 0)
|
||||
acl = NULL;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ACL_TYPE_DEFAULT:
|
||||
name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
|
||||
if (!S_ISDIR(inode->i_mode))
|
||||
return acl ? -EACCES : 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
if (acl) {
|
||||
value = ext4_acl_to_disk(acl, &size);
|
||||
if (IS_ERR(value))
|
||||
return (int)PTR_ERR(value);
|
||||
}
|
||||
|
||||
error = ext4_xattr_set_handle(handle, inode, name_index, "",
|
||||
value, size, 0);
|
||||
|
||||
kfree(value);
|
||||
if (!error)
|
||||
set_cached_acl(inode, type, acl);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
handle_t *handle;
|
||||
int error, retries = 0;
|
||||
|
||||
retry:
|
||||
handle = ext4_journal_start(inode, EXT4_HT_XATTR,
|
||||
ext4_jbd2_credits_xattr(inode));
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
error = __ext4_set_acl(handle, inode, type, acl);
|
||||
ext4_journal_stop(handle);
|
||||
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the ACLs of a new inode. Called from ext4_new_inode.
|
||||
*
|
||||
* dir->i_mutex: down
|
||||
* inode->i_mutex: up (access to inode is still exclusive)
|
||||
*/
|
||||
int
|
||||
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
|
||||
{
|
||||
struct posix_acl *default_acl, *acl;
|
||||
int error;
|
||||
|
||||
error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (default_acl) {
|
||||
error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
|
||||
default_acl);
|
||||
posix_acl_release(default_acl);
|
||||
}
|
||||
if (acl) {
|
||||
if (!error)
|
||||
error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
|
||||
acl);
|
||||
posix_acl_release(acl);
|
||||
}
|
||||
return error;
|
||||
}
|
72
fs/ext4/acl.h
Normal file
72
fs/ext4/acl.h
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
File: fs/ext4/acl.h
|
||||
|
||||
(C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
|
||||
*/
|
||||
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
|
||||
#define EXT4_ACL_VERSION 0x0001
|
||||
|
||||
typedef struct {
|
||||
__le16 e_tag;
|
||||
__le16 e_perm;
|
||||
__le32 e_id;
|
||||
} ext4_acl_entry;
|
||||
|
||||
typedef struct {
|
||||
__le16 e_tag;
|
||||
__le16 e_perm;
|
||||
} ext4_acl_entry_short;
|
||||
|
||||
typedef struct {
|
||||
__le32 a_version;
|
||||
} ext4_acl_header;
|
||||
|
||||
static inline size_t ext4_acl_size(int count)
|
||||
{
|
||||
if (count <= 4) {
|
||||
return sizeof(ext4_acl_header) +
|
||||
count * sizeof(ext4_acl_entry_short);
|
||||
} else {
|
||||
return sizeof(ext4_acl_header) +
|
||||
4 * sizeof(ext4_acl_entry_short) +
|
||||
(count - 4) * sizeof(ext4_acl_entry);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ext4_acl_count(size_t size)
|
||||
{
|
||||
ssize_t s;
|
||||
size -= sizeof(ext4_acl_header);
|
||||
s = size - 4 * sizeof(ext4_acl_entry_short);
|
||||
if (s < 0) {
|
||||
if (size % sizeof(ext4_acl_entry_short))
|
||||
return -1;
|
||||
return size / sizeof(ext4_acl_entry_short);
|
||||
} else {
|
||||
if (s % sizeof(ext4_acl_entry))
|
||||
return -1;
|
||||
return s / sizeof(ext4_acl_entry) + 4;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EXT4_FS_POSIX_ACL
|
||||
|
||||
/* acl.c */
|
||||
struct posix_acl *ext4_get_acl(struct inode *inode, int type);
|
||||
int ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type);
|
||||
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
|
||||
|
||||
#else /* CONFIG_EXT4_FS_POSIX_ACL */
|
||||
#include <linux/sched.h>
|
||||
#define ext4_get_acl NULL
|
||||
#define ext4_set_acl NULL
|
||||
|
||||
static inline int
|
||||
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
|
||||
|
883
fs/ext4/balloc.c
Normal file
883
fs/ext4/balloc.c
Normal file
|
@ -0,0 +1,883 @@
|
|||
/*
|
||||
* linux/fs/ext4/balloc.c
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*
|
||||
* Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
|
||||
* Big-endian to little-endian byte-swapping/bitmaps by
|
||||
* David S. Miller (davem@caip.rutgers.edu), 1995
|
||||
*/
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
#include "mballoc.h"
|
||||
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
|
||||
ext4_group_t block_group);
|
||||
/*
|
||||
* balloc.c contains the blocks allocation and deallocation routines
|
||||
*/
|
||||
|
||||
/*
|
||||
* Calculate block group number for a given block number
|
||||
*/
|
||||
ext4_group_t ext4_get_group_number(struct super_block *sb,
|
||||
ext4_fsblk_t block)
|
||||
{
|
||||
ext4_group_t group;
|
||||
|
||||
if (test_opt2(sb, STD_GROUP_SIZE))
|
||||
group = (block -
|
||||
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
|
||||
(EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
|
||||
else
|
||||
ext4_get_group_no_and_offset(sb, block, &group, NULL);
|
||||
return group;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the block group number and offset into the block/cluster
|
||||
* allocation bitmap, given a block number
|
||||
*/
|
||||
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
|
||||
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
|
||||
{
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
ext4_grpblk_t offset;
|
||||
|
||||
blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
|
||||
offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
|
||||
EXT4_SB(sb)->s_cluster_bits;
|
||||
if (offsetp)
|
||||
*offsetp = offset;
|
||||
if (blockgrpp)
|
||||
*blockgrpp = blocknr;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the 'block' lives within the 'block_group'. Returns 1 if so
|
||||
* and 0 otherwise.
|
||||
*/
|
||||
static inline int ext4_block_in_group(struct super_block *sb,
|
||||
ext4_fsblk_t block,
|
||||
ext4_group_t block_group)
|
||||
{
|
||||
ext4_group_t actual_group;
|
||||
|
||||
actual_group = ext4_get_group_number(sb, block);
|
||||
return (actual_group == block_group) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* Return the number of clusters used for file system metadata; this
|
||||
* represents the overhead needed by the file system.
|
||||
*/
|
||||
static unsigned ext4_num_overhead_clusters(struct super_block *sb,
|
||||
ext4_group_t block_group,
|
||||
struct ext4_group_desc *gdp)
|
||||
{
|
||||
unsigned num_clusters;
|
||||
int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
|
||||
ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
|
||||
ext4_fsblk_t itbl_blk;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
/* This is the number of clusters used by the superblock,
|
||||
* block group descriptors, and reserved block group
|
||||
* descriptor blocks */
|
||||
num_clusters = ext4_num_base_meta_clusters(sb, block_group);
|
||||
|
||||
/*
|
||||
* For the allocation bitmaps and inode table, we first need
|
||||
* to check to see if the block is in the block group. If it
|
||||
* is, then check to see if the cluster is already accounted
|
||||
* for in the clusters used for the base metadata cluster, or
|
||||
* if we can increment the base metadata cluster to include
|
||||
* that block. Otherwise, we will have to track the cluster
|
||||
* used for the allocation bitmap or inode table explicitly.
|
||||
* Normally all of these blocks are contiguous, so the special
|
||||
* case handling shouldn't be necessary except for *very*
|
||||
* unusual file system layouts.
|
||||
*/
|
||||
if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
|
||||
block_cluster = EXT4_B2C(sbi,
|
||||
ext4_block_bitmap(sb, gdp) - start);
|
||||
if (block_cluster < num_clusters)
|
||||
block_cluster = -1;
|
||||
else if (block_cluster == num_clusters) {
|
||||
num_clusters++;
|
||||
block_cluster = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
|
||||
inode_cluster = EXT4_B2C(sbi,
|
||||
ext4_inode_bitmap(sb, gdp) - start);
|
||||
if (inode_cluster < num_clusters)
|
||||
inode_cluster = -1;
|
||||
else if (inode_cluster == num_clusters) {
|
||||
num_clusters++;
|
||||
inode_cluster = -1;
|
||||
}
|
||||
}
|
||||
|
||||
itbl_blk = ext4_inode_table(sb, gdp);
|
||||
for (i = 0; i < sbi->s_itb_per_group; i++) {
|
||||
if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
|
||||
c = EXT4_B2C(sbi, itbl_blk + i - start);
|
||||
if ((c < num_clusters) || (c == inode_cluster) ||
|
||||
(c == block_cluster) || (c == itbl_cluster))
|
||||
continue;
|
||||
if (c == num_clusters) {
|
||||
num_clusters++;
|
||||
continue;
|
||||
}
|
||||
num_clusters++;
|
||||
itbl_cluster = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (block_cluster != -1)
|
||||
num_clusters++;
|
||||
if (inode_cluster != -1)
|
||||
num_clusters++;
|
||||
|
||||
return num_clusters;
|
||||
}
|
||||
|
||||
static unsigned int num_clusters_in_group(struct super_block *sb,
|
||||
ext4_group_t block_group)
|
||||
{
|
||||
unsigned int blocks;
|
||||
|
||||
if (block_group == ext4_get_groups_count(sb) - 1) {
|
||||
/*
|
||||
* Even though mke2fs always initializes the first and
|
||||
* last group, just in case some other tool was used,
|
||||
* we need to make sure we calculate the right free
|
||||
* blocks.
|
||||
*/
|
||||
blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
|
||||
ext4_group_first_block_no(sb, block_group);
|
||||
} else
|
||||
blocks = EXT4_BLOCKS_PER_GROUP(sb);
|
||||
return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
|
||||
}
|
||||
|
||||
/* Initializes an uninitialized block bitmap */
|
||||
static int ext4_init_block_bitmap(struct super_block *sb,
|
||||
struct buffer_head *bh,
|
||||
ext4_group_t block_group,
|
||||
struct ext4_group_desc *gdp)
|
||||
{
|
||||
unsigned int bit, bit_max;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
ext4_fsblk_t start, tmp;
|
||||
int flex_bg = 0;
|
||||
struct ext4_group_info *grp;
|
||||
|
||||
J_ASSERT_BH(bh, buffer_locked(bh));
|
||||
|
||||
/* If checksum is bad mark all blocks used to prevent allocation
|
||||
* essentially implementing a per-group read-only flag. */
|
||||
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
|
||||
grp = ext4_get_group_info(sb, block_group);
|
||||
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
|
||||
percpu_counter_sub(&sbi->s_freeclusters_counter,
|
||||
grp->bb_free);
|
||||
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
|
||||
if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
|
||||
int count;
|
||||
count = ext4_free_inodes_count(sb, gdp);
|
||||
percpu_counter_sub(&sbi->s_freeinodes_counter,
|
||||
count);
|
||||
}
|
||||
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
|
||||
return -EIO;
|
||||
}
|
||||
memset(bh->b_data, 0, sb->s_blocksize);
|
||||
|
||||
bit_max = ext4_num_base_meta_clusters(sb, block_group);
|
||||
for (bit = 0; bit < bit_max; bit++)
|
||||
ext4_set_bit(bit, bh->b_data);
|
||||
|
||||
start = ext4_group_first_block_no(sb, block_group);
|
||||
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
|
||||
flex_bg = 1;
|
||||
|
||||
/* Set bits for block and inode bitmaps, and inode table */
|
||||
tmp = ext4_block_bitmap(sb, gdp);
|
||||
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
|
||||
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
|
||||
|
||||
tmp = ext4_inode_bitmap(sb, gdp);
|
||||
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
|
||||
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
|
||||
|
||||
tmp = ext4_inode_table(sb, gdp);
|
||||
for (; tmp < ext4_inode_table(sb, gdp) +
|
||||
sbi->s_itb_per_group; tmp++) {
|
||||
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
|
||||
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
|
||||
}
|
||||
|
||||
/*
|
||||
* Also if the number of blocks within the group is less than
|
||||
* the blocksize * 8 ( which is the size of bitmap ), set rest
|
||||
* of the block bitmap to 1
|
||||
*/
|
||||
ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
|
||||
sb->s_blocksize * 8, bh->b_data);
|
||||
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
|
||||
ext4_group_desc_csum_set(sb, block_group, gdp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return the number of free blocks in a block group. It is used when
|
||||
* the block bitmap is uninitialized, so we can't just count the bits
|
||||
* in the bitmap. */
|
||||
unsigned ext4_free_clusters_after_init(struct super_block *sb,
|
||||
ext4_group_t block_group,
|
||||
struct ext4_group_desc *gdp)
|
||||
{
|
||||
return num_clusters_in_group(sb, block_group) -
|
||||
ext4_num_overhead_clusters(sb, block_group, gdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* The free blocks are managed by bitmaps. A file system contains several
|
||||
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
|
||||
* block for inodes, N blocks for the inode table and data blocks.
|
||||
*
|
||||
* The file system contains group descriptors which are located after the
|
||||
* super block. Each descriptor contains the number of the bitmap block and
|
||||
* the free blocks count in the block. The descriptors are loaded in memory
|
||||
* when a file system is mounted (see ext4_fill_super).
|
||||
*/
|
||||
|
||||
/**
|
||||
* ext4_get_group_desc() -- load group descriptor from disk
|
||||
* @sb: super block
|
||||
* @block_group: given block group
|
||||
* @bh: pointer to the buffer head to store the block
|
||||
* group descriptor
|
||||
*/
|
||||
struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
|
||||
ext4_group_t block_group,
|
||||
struct buffer_head **bh)
|
||||
{
|
||||
unsigned int group_desc;
|
||||
unsigned int offset;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
struct ext4_group_desc *desc;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (block_group >= ngroups) {
|
||||
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
|
||||
" groups_count = %u", block_group, ngroups);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
||||
if (!sbi->s_group_desc[group_desc]) {
|
||||
ext4_error(sb, "Group descriptor not loaded - "
|
||||
"block_group = %u, group_desc = %u, desc = %u",
|
||||
block_group, group_desc, offset);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
desc = (struct ext4_group_desc *)(
|
||||
(__u8 *)sbi->s_group_desc[group_desc]->b_data +
|
||||
offset * EXT4_DESC_SIZE(sb));
|
||||
if (bh)
|
||||
*bh = sbi->s_group_desc[group_desc];
|
||||
return desc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the block number which was discovered to be invalid, or 0 if
|
||||
* the block bitmap is valid.
|
||||
*/
|
||||
static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
|
||||
struct ext4_group_desc *desc,
|
||||
ext4_group_t block_group,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
ext4_grpblk_t offset;
|
||||
ext4_grpblk_t next_zero_bit;
|
||||
ext4_fsblk_t blk;
|
||||
ext4_fsblk_t group_first_block;
|
||||
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
|
||||
/* with FLEX_BG, the inode/block bitmaps and itable
|
||||
* blocks may not be in the group at all
|
||||
* so the bitmap validation will be skipped for those groups
|
||||
* or it has to also read the block group where the bitmaps
|
||||
* are located to verify they are set.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
group_first_block = ext4_group_first_block_no(sb, block_group);
|
||||
|
||||
/* check whether block bitmap block number is set */
|
||||
blk = ext4_block_bitmap(sb, desc);
|
||||
offset = blk - group_first_block;
|
||||
if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
|
||||
/* bad block bitmap */
|
||||
return blk;
|
||||
|
||||
/* check whether the inode bitmap block number is set */
|
||||
blk = ext4_inode_bitmap(sb, desc);
|
||||
offset = blk - group_first_block;
|
||||
if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
|
||||
/* bad block bitmap */
|
||||
return blk;
|
||||
|
||||
/* check whether the inode table block number is set */
|
||||
blk = ext4_inode_table(sb, desc);
|
||||
offset = blk - group_first_block;
|
||||
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
|
||||
EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group),
|
||||
EXT4_B2C(sbi, offset));
|
||||
if (next_zero_bit <
|
||||
EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group))
|
||||
/* bad bitmap for inode tables */
|
||||
return blk;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_validate_block_bitmap(struct super_block *sb,
|
||||
struct ext4_group_desc *desc,
|
||||
ext4_group_t block_group,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
ext4_fsblk_t blk;
|
||||
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (buffer_verified(bh))
|
||||
return;
|
||||
|
||||
ext4_lock_group(sb, block_group);
|
||||
blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
|
||||
if (unlikely(blk != 0)) {
|
||||
ext4_unlock_group(sb, block_group);
|
||||
ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
|
||||
block_group, blk);
|
||||
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
|
||||
percpu_counter_sub(&sbi->s_freeclusters_counter,
|
||||
grp->bb_free);
|
||||
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
|
||||
return;
|
||||
}
|
||||
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
|
||||
desc, bh))) {
|
||||
ext4_unlock_group(sb, block_group);
|
||||
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
|
||||
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
|
||||
percpu_counter_sub(&sbi->s_freeclusters_counter,
|
||||
grp->bb_free);
|
||||
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
|
||||
return;
|
||||
}
|
||||
set_buffer_verified(bh);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_read_block_bitmap_nowait()
|
||||
* @sb: super block
|
||||
* @block_group: given block group
|
||||
*
|
||||
* Read the bitmap for a given block_group,and validate the
|
||||
* bits for block/inode/inode tables are set in the bitmaps
|
||||
*
|
||||
* Return buffer_head on success or NULL in case of failure.
|
||||
*/
|
||||
struct buffer_head *
|
||||
ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
|
||||
{
|
||||
struct ext4_group_desc *desc;
|
||||
struct buffer_head *bh;
|
||||
ext4_fsblk_t bitmap_blk;
|
||||
|
||||
desc = ext4_get_group_desc(sb, block_group, NULL);
|
||||
if (!desc)
|
||||
return NULL;
|
||||
bitmap_blk = ext4_block_bitmap(sb, desc);
|
||||
bh = sb_getblk(sb, bitmap_blk);
|
||||
if (unlikely(!bh)) {
|
||||
ext4_error(sb, "Cannot get buffer for block bitmap - "
|
||||
"block_group = %u, block_bitmap = %llu",
|
||||
block_group, bitmap_blk);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bitmap_uptodate(bh))
|
||||
goto verify;
|
||||
|
||||
lock_buffer(bh);
|
||||
if (bitmap_uptodate(bh)) {
|
||||
unlock_buffer(bh);
|
||||
goto verify;
|
||||
}
|
||||
ext4_lock_group(sb, block_group);
|
||||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||
int err;
|
||||
|
||||
err = ext4_init_block_bitmap(sb, bh, block_group, desc);
|
||||
set_bitmap_uptodate(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
unlock_buffer(bh);
|
||||
if (err)
|
||||
ext4_error(sb, "Checksum bad for grp %u", block_group);
|
||||
return bh;
|
||||
}
|
||||
ext4_unlock_group(sb, block_group);
|
||||
if (buffer_uptodate(bh)) {
|
||||
/*
|
||||
* if not uninit if bh is uptodate,
|
||||
* bitmap is also uptodate
|
||||
*/
|
||||
set_bitmap_uptodate(bh);
|
||||
unlock_buffer(bh);
|
||||
goto verify;
|
||||
}
|
||||
/*
|
||||
* submit the buffer_head for reading
|
||||
*/
|
||||
set_buffer_new(bh);
|
||||
trace_ext4_read_block_bitmap_load(sb, block_group);
|
||||
bh->b_end_io = ext4_end_bitmap_read;
|
||||
get_bh(bh);
|
||||
submit_bh(READ | REQ_META | REQ_PRIO, bh);
|
||||
return bh;
|
||||
verify:
|
||||
ext4_validate_block_bitmap(sb, desc, block_group, bh);
|
||||
if (buffer_verified(bh))
|
||||
return bh;
|
||||
put_bh(bh);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Returns 0 on success, 1 on error */
|
||||
int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
struct ext4_group_desc *desc;
|
||||
|
||||
if (!buffer_new(bh))
|
||||
return 0;
|
||||
desc = ext4_get_group_desc(sb, block_group, NULL);
|
||||
if (!desc)
|
||||
return 1;
|
||||
wait_on_buffer(bh);
|
||||
if (!buffer_uptodate(bh)) {
|
||||
ext4_error(sb, "Cannot read block bitmap - "
|
||||
"block_group = %u, block_bitmap = %llu",
|
||||
block_group, (unsigned long long) bh->b_blocknr);
|
||||
return 1;
|
||||
}
|
||||
clear_buffer_new(bh);
|
||||
/* Panic or remount fs read-only if block bitmap is invalid */
|
||||
ext4_validate_block_bitmap(sb, desc, block_group, bh);
|
||||
/* ...but check for error just in case errors=continue. */
|
||||
return !buffer_verified(bh);
|
||||
}
|
||||
|
||||
struct buffer_head *
|
||||
ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
|
||||
bh = ext4_read_block_bitmap_nowait(sb, block_group);
|
||||
if (!bh)
|
||||
return NULL;
|
||||
if (ext4_wait_block_bitmap(sb, block_group, bh)) {
|
||||
put_bh(bh);
|
||||
return NULL;
|
||||
}
|
||||
return bh;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_has_free_clusters()
|
||||
* @sbi: in-core super block structure.
|
||||
* @nclusters: number of needed blocks
|
||||
* @flags: flags from ext4_mb_new_blocks()
|
||||
*
|
||||
* Check if filesystem has nclusters free & available for allocation.
|
||||
* On success return 1, return 0 on failure.
|
||||
*/
|
||||
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
|
||||
s64 nclusters, unsigned int flags)
|
||||
{
|
||||
s64 free_clusters, dirty_clusters, rsv, resv_clusters;
|
||||
struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
|
||||
struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
|
||||
|
||||
free_clusters = percpu_counter_read_positive(fcc);
|
||||
dirty_clusters = percpu_counter_read_positive(dcc);
|
||||
resv_clusters = atomic64_read(&sbi->s_resv_clusters);
|
||||
|
||||
/*
|
||||
* r_blocks_count should always be multiple of the cluster ratio so
|
||||
* we are safe to do a plane bit shift only.
|
||||
*/
|
||||
rsv = (atomic64_read(&sbi->s_r_blocks_count) >> sbi->s_cluster_bits) +
|
||||
resv_clusters;
|
||||
|
||||
if (free_clusters - (nclusters + rsv + dirty_clusters) <
|
||||
EXT4_FREECLUSTERS_WATERMARK) {
|
||||
free_clusters = percpu_counter_sum_positive(fcc);
|
||||
dirty_clusters = percpu_counter_sum_positive(dcc);
|
||||
}
|
||||
/* Check whether we have space after accounting for current
|
||||
* dirty clusters & root reserved clusters.
|
||||
*/
|
||||
if (free_clusters >= (rsv + nclusters + dirty_clusters))
|
||||
return 1;
|
||||
|
||||
/* Hm, nope. Are (enough) root reserved clusters available? */
|
||||
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
|
||||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
|
||||
capable(CAP_SYS_RESOURCE) ||
|
||||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
|
||||
|
||||
if (free_clusters >= (nclusters + dirty_clusters +
|
||||
resv_clusters))
|
||||
return 1;
|
||||
}
|
||||
/* No free blocks. Let's see if we can dip into reserved pool */
|
||||
if (flags & EXT4_MB_USE_RESERVED) {
|
||||
if (free_clusters >= (nclusters + dirty_clusters))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
|
||||
s64 nclusters, unsigned int flags)
|
||||
{
|
||||
if (ext4_has_free_clusters(sbi, nclusters, flags)) {
|
||||
percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
|
||||
return 0;
|
||||
} else
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_should_retry_alloc()
|
||||
* @sb: super block
|
||||
* @retries number of attemps has been made
|
||||
*
|
||||
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
|
||||
* it is profitable to retry the operation, this function will wait
|
||||
* for the current or committing transaction to complete, and then
|
||||
* return TRUE.
|
||||
*
|
||||
* if the total number of retries exceed three times, return FALSE.
|
||||
*/
|
||||
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
||||
{
|
||||
if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
|
||||
(*retries)++ > 3 ||
|
||||
!EXT4_SB(sb)->s_journal)
|
||||
return 0;
|
||||
|
||||
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
|
||||
|
||||
return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
|
||||
*
|
||||
* @handle: handle to this transaction
|
||||
* @inode: file inode
|
||||
* @goal: given target block(filesystem wide)
|
||||
* @count: pointer to total number of clusters needed
|
||||
* @errp: error code
|
||||
*
|
||||
* Return 1st allocated block number on success, *count stores total account
|
||||
* error stores in errp pointer
|
||||
*/
|
||||
ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, unsigned int flags,
|
||||
unsigned long *count, int *errp)
|
||||
{
|
||||
struct ext4_allocation_request ar;
|
||||
ext4_fsblk_t ret;
|
||||
|
||||
memset(&ar, 0, sizeof(ar));
|
||||
/* Fill with neighbour allocated blocks */
|
||||
ar.inode = inode;
|
||||
ar.goal = goal;
|
||||
ar.len = count ? *count : 1;
|
||||
ar.flags = flags;
|
||||
|
||||
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
||||
if (count)
|
||||
*count = ar.len;
|
||||
/*
|
||||
* Account for the allocated meta blocks. We will never
|
||||
* fail EDQUOT for metdata, but we do account for it.
|
||||
*/
|
||||
if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
|
||||
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||
dquot_alloc_block_nofail(inode,
|
||||
EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_count_free_clusters() -- count filesystem free clusters
|
||||
* @sb: superblock
|
||||
*
|
||||
* Adds up the number of free clusters from each block group.
|
||||
*/
|
||||
ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
|
||||
{
|
||||
ext4_fsblk_t desc_count;
|
||||
struct ext4_group_desc *gdp;
|
||||
ext4_group_t i;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
struct ext4_group_info *grp;
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct ext4_super_block *es;
|
||||
ext4_fsblk_t bitmap_count;
|
||||
unsigned int x;
|
||||
struct buffer_head *bitmap_bh = NULL;
|
||||
|
||||
es = EXT4_SB(sb)->s_es;
|
||||
desc_count = 0;
|
||||
bitmap_count = 0;
|
||||
gdp = NULL;
|
||||
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
continue;
|
||||
grp = NULL;
|
||||
if (EXT4_SB(sb)->s_group_info)
|
||||
grp = ext4_get_group_info(sb, i);
|
||||
if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
|
||||
desc_count += ext4_free_group_clusters(sb, gdp);
|
||||
brelse(bitmap_bh);
|
||||
bitmap_bh = ext4_read_block_bitmap(sb, i);
|
||||
if (bitmap_bh == NULL)
|
||||
continue;
|
||||
|
||||
x = ext4_count_free(bitmap_bh->b_data,
|
||||
EXT4_CLUSTERS_PER_GROUP(sb) / 8);
|
||||
printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
|
||||
i, ext4_free_group_clusters(sb, gdp), x);
|
||||
bitmap_count += x;
|
||||
}
|
||||
brelse(bitmap_bh);
|
||||
printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
|
||||
", computed = %llu, %llu\n",
|
||||
EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
|
||||
desc_count, bitmap_count);
|
||||
return bitmap_count;
|
||||
#else
|
||||
desc_count = 0;
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
continue;
|
||||
grp = NULL;
|
||||
if (EXT4_SB(sb)->s_group_info)
|
||||
grp = ext4_get_group_info(sb, i);
|
||||
if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
|
||||
desc_count += ext4_free_group_clusters(sb, gdp);
|
||||
}
|
||||
|
||||
return desc_count;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int test_root(ext4_group_t a, int b)
|
||||
{
|
||||
while (1) {
|
||||
if (a < b)
|
||||
return 0;
|
||||
if (a == b)
|
||||
return 1;
|
||||
if ((a % b) != 0)
|
||||
return 0;
|
||||
a = a / b;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_bg_has_super - number of blocks used by the superblock in group
|
||||
* @sb: superblock for filesystem
|
||||
* @group: group number to check
|
||||
*
|
||||
* Return the number of blocks used by the superblock (primary or backup)
|
||||
* in this group. Currently this will be only 0 or 1.
|
||||
*/
|
||||
int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
|
||||
{
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
|
||||
if (group == 0)
|
||||
return 1;
|
||||
if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) {
|
||||
if (group == le32_to_cpu(es->s_backup_bgs[0]) ||
|
||||
group == le32_to_cpu(es->s_backup_bgs[1]))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER))
|
||||
return 1;
|
||||
if (!(group & 1))
|
||||
return 0;
|
||||
if (test_root(group, 3) || (test_root(group, 5)) ||
|
||||
test_root(group, 7))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
{
|
||||
unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
|
||||
ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
|
||||
ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
|
||||
|
||||
if (group == first || group == first + 1 || group == last)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
{
|
||||
if (!ext4_bg_has_super(sb, group))
|
||||
return 0;
|
||||
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
|
||||
return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
|
||||
else
|
||||
return EXT4_SB(sb)->s_gdb_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_bg_num_gdb - number of blocks used by the group table in group
|
||||
* @sb: superblock for filesystem
|
||||
* @group: group number to check
|
||||
*
|
||||
* Return the number of blocks used by the group descriptor table
|
||||
* (primary or backup) in this group. In the future there may be a
|
||||
* different number of descriptor blocks in each group.
|
||||
*/
|
||||
unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
|
||||
{
|
||||
unsigned long first_meta_bg =
|
||||
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
|
||||
unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
|
||||
|
||||
if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
|
||||
metagroup < first_meta_bg)
|
||||
return ext4_bg_num_gdb_nometa(sb, group);
|
||||
|
||||
return ext4_bg_num_gdb_meta(sb,group);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns the number of file system metadata clusters at
|
||||
* the beginning of a block group, including the reserved gdt blocks.
|
||||
*/
|
||||
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
|
||||
ext4_group_t block_group)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
unsigned num;
|
||||
|
||||
/* Check for superblock and gdt backups in this group */
|
||||
num = ext4_bg_has_super(sb, block_group);
|
||||
|
||||
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
|
||||
block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
|
||||
sbi->s_desc_per_block) {
|
||||
if (num) {
|
||||
num += ext4_bg_num_gdb(sb, block_group);
|
||||
num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
|
||||
}
|
||||
} else { /* For META_BG_BLOCK_GROUPS */
|
||||
num += ext4_bg_num_gdb(sb, block_group);
|
||||
}
|
||||
return EXT4_NUM_B2C(sbi, num);
|
||||
}
|
||||
/**
|
||||
* ext4_inode_to_goal_block - return a hint for block allocation
|
||||
* @inode: inode for block allocation
|
||||
*
|
||||
* Return the ideal location to start allocating blocks for a
|
||||
* newly created inode.
|
||||
*/
|
||||
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
ext4_group_t block_group;
|
||||
ext4_grpblk_t colour;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
|
||||
ext4_fsblk_t bg_start;
|
||||
ext4_fsblk_t last_block;
|
||||
|
||||
block_group = ei->i_block_group;
|
||||
if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
|
||||
/*
|
||||
* If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
|
||||
* block groups per flexgroup, reserve the first block
|
||||
* group for directories and special files. Regular
|
||||
* files will start at the second block group. This
|
||||
* tends to speed up directory access and improves
|
||||
* fsck times.
|
||||
*/
|
||||
block_group &= ~(flex_size-1);
|
||||
if (S_ISREG(inode->i_mode))
|
||||
block_group++;
|
||||
}
|
||||
bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
|
||||
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
|
||||
|
||||
/*
|
||||
* If we are doing delayed allocation, we don't need take
|
||||
* colour into account.
|
||||
*/
|
||||
if (test_opt(inode->i_sb, DELALLOC))
|
||||
return bg_start;
|
||||
|
||||
if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
|
||||
colour = (current->pid % 16) *
|
||||
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
|
||||
else
|
||||
colour = (current->pid % 16) * ((last_block - bg_start) / 16);
|
||||
return bg_start + colour;
|
||||
}
|
||||
|
98
fs/ext4/bitmap.c
Normal file
98
fs/ext4/bitmap.c
Normal file
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* linux/fs/ext4/bitmap.c
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*/
|
||||
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include "ext4.h"
|
||||
|
||||
unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
|
||||
{
|
||||
return numchars * BITS_PER_BYTE - memweight(bitmap, numchars);
|
||||
}
|
||||
|
||||
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh, int sz)
|
||||
{
|
||||
__u32 hi;
|
||||
__u32 provided, calculated;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
|
||||
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) {
|
||||
hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi);
|
||||
provided |= (hi << 16);
|
||||
} else
|
||||
calculated &= 0xFFFF;
|
||||
|
||||
return provided == calculated;
|
||||
}
|
||||
|
||||
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh, int sz)
|
||||
{
|
||||
__u32 csum;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
|
||||
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
|
||||
gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16);
|
||||
}
|
||||
|
||||
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
__u32 hi;
|
||||
__u32 provided, calculated;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
|
||||
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) {
|
||||
hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi);
|
||||
provided |= (hi << 16);
|
||||
} else
|
||||
calculated &= 0xFFFF;
|
||||
|
||||
if (provided == calculated)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
|
||||
__u32 csum;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
|
||||
if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END)
|
||||
gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16);
|
||||
}
|
253
fs/ext4/block_validity.c
Normal file
253
fs/ext4/block_validity.c
Normal file
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* linux/fs/ext4/block_validity.c
|
||||
*
|
||||
* Copyright (C) 2009
|
||||
* Theodore Ts'o (tytso@mit.edu)
|
||||
*
|
||||
* Track which blocks in the filesystem are metadata blocks that
|
||||
* should never be used as data blocks by files or directories.
|
||||
*/
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
#include "ext4.h"
|
||||
|
||||
struct ext4_system_zone {
|
||||
struct rb_node node;
|
||||
ext4_fsblk_t start_blk;
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
static struct kmem_cache *ext4_system_zone_cachep;
|
||||
|
||||
int __init ext4_init_system_zone(void)
|
||||
{
|
||||
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
|
||||
if (ext4_system_zone_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext4_exit_system_zone(void)
|
||||
{
|
||||
kmem_cache_destroy(ext4_system_zone_cachep);
|
||||
}
|
||||
|
||||
static inline int can_merge(struct ext4_system_zone *entry1,
|
||||
struct ext4_system_zone *entry2)
|
||||
{
|
||||
if ((entry1->start_blk + entry1->count) == entry2->start_blk)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a range of blocks as belonging to the "system zone" --- that
|
||||
* is, filesystem metadata blocks which should never be used by
|
||||
* inodes.
|
||||
*/
|
||||
static int add_system_zone(struct ext4_sb_info *sbi,
|
||||
ext4_fsblk_t start_blk,
|
||||
unsigned int count)
|
||||
{
|
||||
struct ext4_system_zone *new_entry = NULL, *entry;
|
||||
struct rb_node **n = &sbi->system_blks.rb_node, *node;
|
||||
struct rb_node *parent = NULL, *new_node = NULL;
|
||||
|
||||
while (*n) {
|
||||
parent = *n;
|
||||
entry = rb_entry(parent, struct ext4_system_zone, node);
|
||||
if (start_blk < entry->start_blk)
|
||||
n = &(*n)->rb_left;
|
||||
else if (start_blk >= (entry->start_blk + entry->count))
|
||||
n = &(*n)->rb_right;
|
||||
else {
|
||||
if (start_blk + count > (entry->start_blk +
|
||||
entry->count))
|
||||
entry->count = (start_blk + count -
|
||||
entry->start_blk);
|
||||
new_node = *n;
|
||||
new_entry = rb_entry(new_node, struct ext4_system_zone,
|
||||
node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_entry) {
|
||||
new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
|
||||
GFP_KERNEL);
|
||||
if (!new_entry)
|
||||
return -ENOMEM;
|
||||
new_entry->start_blk = start_blk;
|
||||
new_entry->count = count;
|
||||
new_node = &new_entry->node;
|
||||
|
||||
rb_link_node(new_node, parent, n);
|
||||
rb_insert_color(new_node, &sbi->system_blks);
|
||||
}
|
||||
|
||||
/* Can we merge to the left? */
|
||||
node = rb_prev(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_system_zone, node);
|
||||
if (can_merge(entry, new_entry)) {
|
||||
new_entry->start_blk = entry->start_blk;
|
||||
new_entry->count += entry->count;
|
||||
rb_erase(node, &sbi->system_blks);
|
||||
kmem_cache_free(ext4_system_zone_cachep, entry);
|
||||
}
|
||||
}
|
||||
|
||||
/* Can we merge to the right? */
|
||||
node = rb_next(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_system_zone, node);
|
||||
if (can_merge(new_entry, entry)) {
|
||||
new_entry->count += entry->count;
|
||||
rb_erase(node, &sbi->system_blks);
|
||||
kmem_cache_free(ext4_system_zone_cachep, entry);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void debug_print_tree(struct ext4_sb_info *sbi)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct ext4_system_zone *entry;
|
||||
int first = 1;
|
||||
|
||||
printk(KERN_INFO "System zones: ");
|
||||
node = rb_first(&sbi->system_blks);
|
||||
while (node) {
|
||||
entry = rb_entry(node, struct ext4_system_zone, node);
|
||||
printk("%s%llu-%llu", first ? "" : ", ",
|
||||
entry->start_blk, entry->start_blk + entry->count - 1);
|
||||
first = 0;
|
||||
node = rb_next(node);
|
||||
}
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
#ifdef VERIFY_META_ONLY
|
||||
struct rb_root *ext4_system_zone_root(struct super_block *sb)
|
||||
{
|
||||
return &EXT4_SB(sb)->system_blks;
|
||||
}
|
||||
#endif
|
||||
int ext4_setup_system_zone(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_group_desc *gdp;
|
||||
ext4_group_t i;
|
||||
int flex_size = ext4_flex_bg_size(sbi);
|
||||
int ret;
|
||||
|
||||
if (!test_opt(sb, BLOCK_VALIDITY)) {
|
||||
if (EXT4_SB(sb)->system_blks.rb_node)
|
||||
ext4_release_system_zone(sb);
|
||||
return 0;
|
||||
}
|
||||
if (EXT4_SB(sb)->system_blks.rb_node)
|
||||
return 0;
|
||||
|
||||
for (i=0; i < ngroups; i++) {
|
||||
if (ext4_bg_has_super(sb, i) &&
|
||||
((i < 5) || ((i % flex_size) == 0)))
|
||||
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
|
||||
ext4_bg_num_gdb(sb, i) + 1);
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
|
||||
sbi->s_itb_per_group);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (test_opt(sb, DEBUG))
|
||||
debug_print_tree(EXT4_SB(sb));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called when the filesystem is unmounted */
|
||||
void ext4_release_system_zone(struct super_block *sb)
|
||||
{
|
||||
struct ext4_system_zone *entry, *n;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(entry, n,
|
||||
&EXT4_SB(sb)->system_blks, node)
|
||||
kmem_cache_free(ext4_system_zone_cachep, entry);
|
||||
|
||||
EXT4_SB(sb)->system_blks = RB_ROOT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 1 if the passed-in block region (start_blk,
|
||||
* start_blk+count) is valid; 0 if some part of the block region
|
||||
* overlaps with filesystem metadata blocks.
|
||||
*/
|
||||
int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
|
||||
unsigned int count)
|
||||
{
|
||||
#ifndef VERIFY_META_ONLY
|
||||
struct ext4_system_zone *entry;
|
||||
struct rb_node *n = sbi->system_blks.rb_node;
|
||||
#endif
|
||||
|
||||
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
|
||||
(start_blk + count < start_blk) ||
|
||||
(start_blk + count > ext4_blocks_count(sbi->s_es))) {
|
||||
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
|
||||
return 0;
|
||||
}
|
||||
#ifndef VERIFY_META_ONLY
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct ext4_system_zone, node);
|
||||
if (start_blk + count - 1 < entry->start_blk)
|
||||
n = n->rb_left;
|
||||
else if (start_blk >= (entry->start_blk + entry->count))
|
||||
n = n->rb_right;
|
||||
else {
|
||||
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ext4_check_blockref(const char *function, unsigned int line,
|
||||
struct inode *inode, __le32 *p, unsigned int max)
|
||||
{
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
__le32 *bref = p;
|
||||
unsigned int blk;
|
||||
|
||||
while (bref < p+max) {
|
||||
blk = le32_to_cpu(*bref++);
|
||||
if (blk &&
|
||||
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
|
||||
blk, 1))) {
|
||||
es->s_last_error_block = cpu_to_le64(blk);
|
||||
ext4_error_inode(inode, function, line, blk,
|
||||
"invalid block");
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
611
fs/ext4/dir.c
Normal file
611
fs/ext4/dir.c
Normal file
|
@ -0,0 +1,611 @@
|
|||
/*
|
||||
* linux/fs/ext4/dir.c
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*
|
||||
* from
|
||||
*
|
||||
* linux/fs/minix/dir.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* ext4 directory handling functions
|
||||
*
|
||||
* Big-endian to little-endian byte-swapping/bitmaps by
|
||||
* David S. Miller (davem@caip.rutgers.edu), 1995
|
||||
*
|
||||
* Hash Tree Directory indexing (c) 2001 Daniel Phillips
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static int ext4_dx_readdir(struct file *, struct dir_context *);
|
||||
|
||||
/**
|
||||
* Check if the given dir-inode refers to an htree-indexed directory
|
||||
* (or a directory which could potentially get converted to use htree
|
||||
* indexing).
|
||||
*
|
||||
* Return 1 if it is a dx dir, 0 if not
|
||||
*/
|
||||
static int is_dx_dir(struct inode *inode)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
|
||||
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
|
||||
((inode->i_size >> sb->s_blocksize_bits) == 1) ||
|
||||
ext4_has_inline_data(inode)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 if the directory entry is OK, and 1 if there is a problem
|
||||
*
|
||||
* Note: this is the opposite of what ext2 and ext3 historically returned...
|
||||
*
|
||||
* bh passed here can be an inode block or a dir data block, depending
|
||||
* on the inode inline data flag.
|
||||
*/
|
||||
int __ext4_check_dir_entry(const char *function, unsigned int line,
|
||||
struct inode *dir, struct file *filp,
|
||||
struct ext4_dir_entry_2 *de,
|
||||
struct buffer_head *bh, char *buf, int size,
|
||||
unsigned int offset)
|
||||
{
|
||||
const char *error_msg = NULL;
|
||||
const int rlen = ext4_rec_len_from_disk(de->rec_len,
|
||||
dir->i_sb->s_blocksize);
|
||||
|
||||
if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
|
||||
error_msg = "rec_len is smaller than minimal";
|
||||
else if (unlikely(rlen % 4 != 0))
|
||||
error_msg = "rec_len % 4 != 0";
|
||||
else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
|
||||
error_msg = "rec_len is too small for name_len";
|
||||
else if (unlikely(((char *) de - buf) + rlen > size))
|
||||
error_msg = "directory entry across range";
|
||||
else if (unlikely(le32_to_cpu(de->inode) >
|
||||
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
|
||||
error_msg = "inode out of bounds";
|
||||
else
|
||||
return 0;
|
||||
|
||||
/* for debugging, sangwoo2.lee */
|
||||
print_bh(dir->i_sb, bh, 0, EXT4_BLOCK_SIZE(dir->i_sb));
|
||||
/* for debugging */
|
||||
|
||||
if (filp)
|
||||
ext4_error_file(filp, function, line, bh->b_blocknr,
|
||||
"bad entry in directory: %s - offset=%u(%u), "
|
||||
"inode=%u, rec_len=%d, name_len=%d",
|
||||
error_msg, (unsigned) (offset % size),
|
||||
offset, le32_to_cpu(de->inode),
|
||||
rlen, de->name_len);
|
||||
else
|
||||
ext4_error_inode(dir, function, line, bh->b_blocknr,
|
||||
"bad entry in directory: %s - offset=%u(%u), "
|
||||
"inode=%u, rec_len=%d, name_len=%d",
|
||||
error_msg, (unsigned) (offset % size),
|
||||
offset, le32_to_cpu(de->inode),
|
||||
rlen, de->name_len);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ext4_readdir(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
unsigned int offset;
|
||||
int i;
|
||||
struct ext4_dir_entry_2 *de;
|
||||
int err;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int dir_has_error = 0;
|
||||
|
||||
if (is_dx_dir(inode)) {
|
||||
err = ext4_dx_readdir(file, ctx);
|
||||
if (err != ERR_BAD_DX_DIR) {
|
||||
return err;
|
||||
}
|
||||
/*
|
||||
* We don't set the inode dirty flag since it's not
|
||||
* critical that it get flushed back to the disk.
|
||||
*/
|
||||
ext4_clear_inode_flag(file_inode(file),
|
||||
EXT4_INODE_INDEX);
|
||||
}
|
||||
|
||||
if (ext4_has_inline_data(inode)) {
|
||||
int has_inline_data = 1;
|
||||
int ret = ext4_read_inline_dir(file, ctx,
|
||||
&has_inline_data);
|
||||
if (has_inline_data)
|
||||
return ret;
|
||||
}
|
||||
|
||||
offset = ctx->pos & (sb->s_blocksize - 1);
|
||||
|
||||
while (ctx->pos < inode->i_size) {
|
||||
struct ext4_map_blocks map;
|
||||
struct buffer_head *bh = NULL;
|
||||
|
||||
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
map.m_len = 1;
|
||||
err = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (err > 0) {
|
||||
pgoff_t index = map.m_pblk >>
|
||||
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
if (!ra_has_index(&file->f_ra, index))
|
||||
page_cache_sync_readahead(
|
||||
sb->s_bdev->bd_inode->i_mapping,
|
||||
&file->f_ra, file,
|
||||
index, 1);
|
||||
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
|
||||
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
|
||||
if (IS_ERR(bh))
|
||||
return PTR_ERR(bh);
|
||||
}
|
||||
|
||||
if (!bh) {
|
||||
if (!dir_has_error) {
|
||||
EXT4_ERROR_FILE(file, 0,
|
||||
"directory contains a "
|
||||
"hole at offset %llu",
|
||||
(unsigned long long) ctx->pos);
|
||||
dir_has_error = 1;
|
||||
}
|
||||
/* corrupt size? Maybe no more blocks to read */
|
||||
if (ctx->pos > inode->i_blocks << 9)
|
||||
break;
|
||||
ctx->pos += sb->s_blocksize - offset;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check the checksum */
|
||||
if (!buffer_verified(bh) &&
|
||||
!ext4_dirent_csum_verify(inode,
|
||||
(struct ext4_dir_entry *)bh->b_data)) {
|
||||
EXT4_ERROR_FILE(file, 0, "directory fails checksum "
|
||||
"at offset %llu",
|
||||
(unsigned long long)ctx->pos);
|
||||
ctx->pos += sb->s_blocksize - offset;
|
||||
brelse(bh);
|
||||
continue;
|
||||
}
|
||||
set_buffer_verified(bh);
|
||||
|
||||
/* If the dir block has changed since the last call to
|
||||
* readdir(2), then we might be pointing to an invalid
|
||||
* dirent right now. Scan from the start of the block
|
||||
* to make sure. */
|
||||
if (file->f_version != inode->i_version) {
|
||||
for (i = 0; i < sb->s_blocksize && i < offset; ) {
|
||||
de = (struct ext4_dir_entry_2 *)
|
||||
(bh->b_data + i);
|
||||
/* It's too expensive to do a full
|
||||
* dirent test each time round this
|
||||
* loop, but we do have to test at
|
||||
* least that it is non-zero. A
|
||||
* failure will be detected in the
|
||||
* dirent test below. */
|
||||
if (ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
|
||||
break;
|
||||
i += ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize);
|
||||
}
|
||||
offset = i;
|
||||
ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
|
||||
| offset;
|
||||
file->f_version = inode->i_version;
|
||||
}
|
||||
|
||||
while (ctx->pos < inode->i_size
|
||||
&& offset < sb->s_blocksize) {
|
||||
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
|
||||
if (ext4_check_dir_entry(inode, file, de, bh,
|
||||
bh->b_data, bh->b_size,
|
||||
offset)) {
|
||||
/*
|
||||
* On error, skip to the next block
|
||||
*/
|
||||
ctx->pos = (ctx->pos |
|
||||
(sb->s_blocksize - 1)) + 1;
|
||||
break;
|
||||
}
|
||||
offset += ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize);
|
||||
if (le32_to_cpu(de->inode)) {
|
||||
if (!dir_emit(ctx, de->name,
|
||||
de->name_len,
|
||||
le32_to_cpu(de->inode),
|
||||
get_dtype(sb, de->file_type))) {
|
||||
brelse(bh);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
ctx->pos += ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize);
|
||||
}
|
||||
offset = 0;
|
||||
brelse(bh);
|
||||
if (ctx->pos < inode->i_size) {
|
||||
if (!dir_relax(inode))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_32bit_api(void)
|
||||
{
|
||||
#ifdef CONFIG_COMPAT
|
||||
return is_compat_task();
|
||||
#else
|
||||
return (BITS_PER_LONG == 32);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* These functions convert from the major/minor hash to an f_pos
|
||||
* value for dx directories
|
||||
*
|
||||
* Upper layer (for example NFS) should specify FMODE_32BITHASH or
|
||||
* FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
|
||||
* directly on both 32-bit and 64-bit nodes, under such case, neither
|
||||
* FMODE_32BITHASH nor FMODE_64BITHASH is specified.
|
||||
*/
|
||||
static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
|
||||
{
|
||||
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||
return major >> 1;
|
||||
else
|
||||
return ((__u64)(major >> 1) << 32) | (__u64)minor;
|
||||
}
|
||||
|
||||
static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
|
||||
{
|
||||
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||
return (pos << 1) & 0xffffffff;
|
||||
else
|
||||
return ((pos >> 32) << 1) & 0xffffffff;
|
||||
}
|
||||
|
||||
static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
|
||||
{
|
||||
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||
return 0;
|
||||
else
|
||||
return pos & 0xffffffff;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 32- or 64-bit end-of-file for dx directories
|
||||
*/
|
||||
static inline loff_t ext4_get_htree_eof(struct file *filp)
|
||||
{
|
||||
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||
return EXT4_HTREE_EOF_32BIT;
|
||||
else
|
||||
return EXT4_HTREE_EOF_64BIT;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ext4_dir_llseek() calls generic_file_llseek_size to handle htree
|
||||
* directories, where the "offset" is in terms of the filename hash
|
||||
* value instead of the byte offset.
|
||||
*
|
||||
* Because we may return a 64-bit hash that is well beyond offset limits,
|
||||
* we need to pass the max hash as the maximum allowable offset in
|
||||
* the htree directory case.
|
||||
*
|
||||
* For non-htree, ext4_llseek already chooses the proper max offset.
|
||||
*/
|
||||
static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
int dx_dir = is_dx_dir(inode);
|
||||
loff_t htree_max = ext4_get_htree_eof(file);
|
||||
|
||||
if (likely(dx_dir))
|
||||
return generic_file_llseek_size(file, offset, whence,
|
||||
htree_max, htree_max);
|
||||
else
|
||||
return ext4_llseek(file, offset, whence);
|
||||
}
|
||||
|
||||
/*
|
||||
* This structure holds the nodes of the red-black tree used to store
|
||||
* the directory entry in hash order.
|
||||
*/
|
||||
struct fname {
|
||||
__u32 hash;
|
||||
__u32 minor_hash;
|
||||
struct rb_node rb_hash;
|
||||
struct fname *next;
|
||||
__u32 inode;
|
||||
__u8 name_len;
|
||||
__u8 file_type;
|
||||
char name[0];
|
||||
};
|
||||
|
||||
/*
|
||||
* This functoin implements a non-recursive way of freeing all of the
|
||||
* nodes in the red-black tree.
|
||||
*/
|
||||
static void free_rb_tree_fname(struct rb_root *root)
|
||||
{
|
||||
struct fname *fname, *next;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
|
||||
while (fname) {
|
||||
struct fname *old = fname;
|
||||
fname = fname->next;
|
||||
kfree(old);
|
||||
}
|
||||
|
||||
*root = RB_ROOT;
|
||||
}
|
||||
|
||||
|
||||
static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
|
||||
loff_t pos)
|
||||
{
|
||||
struct dir_private_info *p;
|
||||
|
||||
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
|
||||
if (!p)
|
||||
return NULL;
|
||||
p->curr_hash = pos2maj_hash(filp, pos);
|
||||
p->curr_minor_hash = pos2min_hash(filp, pos);
|
||||
return p;
|
||||
}
|
||||
|
||||
void ext4_htree_free_dir_info(struct dir_private_info *p)
|
||||
{
|
||||
free_rb_tree_fname(&p->root);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a directory entry, enter it into the fname rb tree.
|
||||
*/
|
||||
int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
|
||||
__u32 minor_hash,
|
||||
struct ext4_dir_entry_2 *dirent)
|
||||
{
|
||||
struct rb_node **p, *parent = NULL;
|
||||
struct fname *fname, *new_fn;
|
||||
struct dir_private_info *info;
|
||||
int len;
|
||||
|
||||
info = dir_file->private_data;
|
||||
p = &info->root.rb_node;
|
||||
|
||||
/* Create and allocate the fname structure */
|
||||
len = sizeof(struct fname) + dirent->name_len + 1;
|
||||
new_fn = kzalloc(len, GFP_KERNEL);
|
||||
if (!new_fn)
|
||||
return -ENOMEM;
|
||||
new_fn->hash = hash;
|
||||
new_fn->minor_hash = minor_hash;
|
||||
new_fn->inode = le32_to_cpu(dirent->inode);
|
||||
new_fn->name_len = dirent->name_len;
|
||||
new_fn->file_type = dirent->file_type;
|
||||
memcpy(new_fn->name, dirent->name, dirent->name_len);
|
||||
new_fn->name[dirent->name_len] = 0;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
fname = rb_entry(parent, struct fname, rb_hash);
|
||||
|
||||
/*
|
||||
* If the hash and minor hash match up, then we put
|
||||
* them on a linked list. This rarely happens...
|
||||
*/
|
||||
if ((new_fn->hash == fname->hash) &&
|
||||
(new_fn->minor_hash == fname->minor_hash)) {
|
||||
new_fn->next = fname->next;
|
||||
fname->next = new_fn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (new_fn->hash < fname->hash)
|
||||
p = &(*p)->rb_left;
|
||||
else if (new_fn->hash > fname->hash)
|
||||
p = &(*p)->rb_right;
|
||||
else if (new_fn->minor_hash < fname->minor_hash)
|
||||
p = &(*p)->rb_left;
|
||||
else /* if (new_fn->minor_hash > fname->minor_hash) */
|
||||
p = &(*p)->rb_right;
|
||||
}
|
||||
|
||||
rb_link_node(&new_fn->rb_hash, parent, p);
|
||||
rb_insert_color(&new_fn->rb_hash, &info->root);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* This is a helper function for ext4_dx_readdir. It calls filldir
|
||||
* for all entres on the fname linked list. (Normally there is only
|
||||
* one entry on the linked list, unless there are 62 bit hash collisions.)
|
||||
*/
|
||||
static int call_filldir(struct file *file, struct dir_context *ctx,
|
||||
struct fname *fname)
|
||||
{
|
||||
struct dir_private_info *info = file->private_data;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
if (!fname) {
|
||||
ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
|
||||
"called with null fname?!?", __func__, __LINE__,
|
||||
inode->i_ino, current->comm);
|
||||
return 0;
|
||||
}
|
||||
ctx->pos = hash2pos(file, fname->hash, fname->minor_hash);
|
||||
while (fname) {
|
||||
if (!dir_emit(ctx, fname->name,
|
||||
fname->name_len,
|
||||
fname->inode,
|
||||
get_dtype(sb, fname->file_type))) {
|
||||
info->extra_fname = fname;
|
||||
return 1;
|
||||
}
|
||||
fname = fname->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
struct dir_private_info *info = file->private_data;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct fname *fname;
|
||||
int ret;
|
||||
|
||||
if (!info) {
|
||||
info = ext4_htree_create_dir_info(file, ctx->pos);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
file->private_data = info;
|
||||
}
|
||||
|
||||
if (ctx->pos == ext4_get_htree_eof(file))
|
||||
return 0; /* EOF */
|
||||
|
||||
/* Some one has messed with f_pos; reset the world */
|
||||
if (info->last_pos != ctx->pos) {
|
||||
free_rb_tree_fname(&info->root);
|
||||
info->curr_node = NULL;
|
||||
info->extra_fname = NULL;
|
||||
info->curr_hash = pos2maj_hash(file, ctx->pos);
|
||||
info->curr_minor_hash = pos2min_hash(file, ctx->pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are any leftover names on the hash collision
|
||||
* chain, return them first.
|
||||
*/
|
||||
if (info->extra_fname) {
|
||||
if (call_filldir(file, ctx, info->extra_fname))
|
||||
goto finished;
|
||||
info->extra_fname = NULL;
|
||||
goto next_node;
|
||||
} else if (!info->curr_node)
|
||||
info->curr_node = rb_first(&info->root);
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* Fill the rbtree if we have no more entries,
|
||||
* or the inode has changed since we last read in the
|
||||
* cached entries.
|
||||
*/
|
||||
if ((!info->curr_node) ||
|
||||
(file->f_version != inode->i_version)) {
|
||||
info->curr_node = NULL;
|
||||
free_rb_tree_fname(&info->root);
|
||||
file->f_version = inode->i_version;
|
||||
ret = ext4_htree_fill_tree(file, info->curr_hash,
|
||||
info->curr_minor_hash,
|
||||
&info->next_hash);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret == 0) {
|
||||
ctx->pos = ext4_get_htree_eof(file);
|
||||
break;
|
||||
}
|
||||
info->curr_node = rb_first(&info->root);
|
||||
}
|
||||
|
||||
fname = rb_entry(info->curr_node, struct fname, rb_hash);
|
||||
info->curr_hash = fname->hash;
|
||||
info->curr_minor_hash = fname->minor_hash;
|
||||
if (call_filldir(file, ctx, fname))
|
||||
break;
|
||||
next_node:
|
||||
info->curr_node = rb_next(info->curr_node);
|
||||
if (info->curr_node) {
|
||||
fname = rb_entry(info->curr_node, struct fname,
|
||||
rb_hash);
|
||||
info->curr_hash = fname->hash;
|
||||
info->curr_minor_hash = fname->minor_hash;
|
||||
} else {
|
||||
if (info->next_hash == ~0) {
|
||||
ctx->pos = ext4_get_htree_eof(file);
|
||||
break;
|
||||
}
|
||||
info->curr_hash = info->next_hash;
|
||||
info->curr_minor_hash = 0;
|
||||
}
|
||||
}
|
||||
finished:
|
||||
info->last_pos = ctx->pos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_release_dir(struct inode *inode, struct file *filp)
|
||||
{
|
||||
if (filp->private_data)
|
||||
ext4_htree_free_dir_info(filp->private_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
|
||||
int buf_size)
|
||||
{
|
||||
struct ext4_dir_entry_2 *de;
|
||||
int nlen, rlen;
|
||||
unsigned int offset = 0;
|
||||
char *top;
|
||||
|
||||
de = (struct ext4_dir_entry_2 *)buf;
|
||||
top = buf + buf_size;
|
||||
while ((char *) de < top) {
|
||||
if (ext4_check_dir_entry(dir, NULL, de, bh,
|
||||
buf, buf_size, offset))
|
||||
return -EIO;
|
||||
nlen = EXT4_DIR_REC_LEN(de->name_len);
|
||||
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
|
||||
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
|
||||
offset += rlen;
|
||||
}
|
||||
if ((char *) de > top)
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations ext4_dir_operations = {
|
||||
.llseek = ext4_dir_llseek,
|
||||
.read = generic_read_dir,
|
||||
.iterate = ext4_readdir,
|
||||
.unlocked_ioctl = ext4_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ext4_compat_ioctl,
|
||||
#endif
|
||||
.fsync = ext4_sync_file,
|
||||
.release = ext4_release_dir,
|
||||
};
|
2863
fs/ext4/ext4.h
Normal file
2863
fs/ext4/ext4.h
Normal file
File diff suppressed because it is too large
Load diff
274
fs/ext4/ext4_extents.h
Normal file
274
fs/ext4/ext4_extents.h
Normal file
|
@ -0,0 +1,274 @@
|
|||
/*
|
||||
* Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
|
||||
* Written by Alex Tomas <alex@clusterfs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public Licens
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
||||
*/
|
||||
|
||||
#ifndef _EXT4_EXTENTS
|
||||
#define _EXT4_EXTENTS
|
||||
|
||||
#include "ext4.h"
|
||||
|
||||
/*
|
||||
* With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
|
||||
* becomes very small, so index split, in-depth growing and
|
||||
* other hard changes happen much more often.
|
||||
* This is for debug purposes only.
|
||||
*/
|
||||
#define AGGRESSIVE_TEST_
|
||||
|
||||
/*
|
||||
* With EXTENTS_STATS defined, the number of blocks and extents
|
||||
* are collected in the truncate path. They'll be shown at
|
||||
* umount time.
|
||||
*/
|
||||
#define EXTENTS_STATS__
|
||||
|
||||
/*
|
||||
* If CHECK_BINSEARCH is defined, then the results of the binary search
|
||||
* will also be checked by linear search.
|
||||
*/
|
||||
#define CHECK_BINSEARCH__
|
||||
|
||||
/*
|
||||
* If EXT_STATS is defined then stats numbers are collected.
|
||||
* These number will be displayed at umount time.
|
||||
*/
|
||||
#define EXT_STATS_
|
||||
|
||||
|
||||
/*
|
||||
* ext4_inode has i_block array (60 bytes total).
|
||||
* The first 12 bytes store ext4_extent_header;
|
||||
* the remainder stores an array of ext4_extent.
|
||||
* For non-inode extent blocks, ext4_extent_tail
|
||||
* follows the array.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is the extent tail on-disk structure.
|
||||
* All other extent structures are 12 bytes long. It turns out that
|
||||
* block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
|
||||
* covers all valid ext4 block sizes. Therefore, this tail structure can be
|
||||
* crammed into the end of the block without having to rebalance the tree.
|
||||
*/
|
||||
struct ext4_extent_tail {
|
||||
__le32 et_checksum; /* crc32c(uuid+inum+extent_block) */
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the extent on-disk structure.
|
||||
* It's used at the bottom of the tree.
|
||||
*/
|
||||
struct ext4_extent {
|
||||
__le32 ee_block; /* first logical block extent covers */
|
||||
__le16 ee_len; /* number of blocks covered by extent */
|
||||
__le16 ee_start_hi; /* high 16 bits of physical block */
|
||||
__le32 ee_start_lo; /* low 32 bits of physical block */
|
||||
};
|
||||
|
||||
/*
|
||||
* This is index on-disk structure.
|
||||
* It's used at all the levels except the bottom.
|
||||
*/
|
||||
struct ext4_extent_idx {
|
||||
__le32 ei_block; /* index covers logical blocks from 'block' */
|
||||
__le32 ei_leaf_lo; /* pointer to the physical block of the next *
|
||||
* level. leaf or next index could be there */
|
||||
__le16 ei_leaf_hi; /* high 16 bits of physical block */
|
||||
__u16 ei_unused;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each block (leaves and indexes), even inode-stored has header.
|
||||
*/
|
||||
struct ext4_extent_header {
|
||||
__le16 eh_magic; /* probably will support different formats */
|
||||
__le16 eh_entries; /* number of valid entries */
|
||||
__le16 eh_max; /* capacity of store in entries */
|
||||
__le16 eh_depth; /* has tree real underlying blocks? */
|
||||
__le32 eh_generation; /* generation of the tree */
|
||||
};
|
||||
|
||||
#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
|
||||
|
||||
#define EXT4_EXTENT_TAIL_OFFSET(hdr) \
|
||||
(sizeof(struct ext4_extent_header) + \
|
||||
(sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max)))
|
||||
|
||||
static inline struct ext4_extent_tail *
|
||||
find_ext4_extent_tail(struct ext4_extent_header *eh)
|
||||
{
|
||||
return (struct ext4_extent_tail *)(((void *)eh) +
|
||||
EXT4_EXTENT_TAIL_OFFSET(eh));
|
||||
}
|
||||
|
||||
/*
|
||||
* Array of ext4_ext_path contains path to some extent.
|
||||
* Creation/lookup routines use it for traversal/splitting/etc.
|
||||
* Truncate uses it to simulate recursive walking.
|
||||
*/
|
||||
struct ext4_ext_path {
|
||||
ext4_fsblk_t p_block;
|
||||
__u16 p_depth;
|
||||
__u16 p_maxdepth;
|
||||
struct ext4_extent *p_ext;
|
||||
struct ext4_extent_idx *p_idx;
|
||||
struct ext4_extent_header *p_hdr;
|
||||
struct buffer_head *p_bh;
|
||||
};
|
||||
|
||||
/*
|
||||
* structure for external API
|
||||
*/
|
||||
|
||||
/*
|
||||
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
|
||||
* initialized extent. This is 2^15 and not (2^16 - 1), since we use the
|
||||
* MSB of ee_len field in the extent datastructure to signify if this
|
||||
* particular extent is an initialized extent or an unwritten (i.e.
|
||||
* preallocated).
|
||||
* EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an
|
||||
* unwritten extent.
|
||||
* If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
|
||||
* unwritten one. In other words, if MSB of ee_len is set, it is an
|
||||
* unwritten extent with only one special scenario when ee_len = 0x8000.
|
||||
* In this case we can not have an unwritten extent of zero length and
|
||||
* thus we make it as a special case of initialized extent with 0x8000 length.
|
||||
* This way we get better extent-to-group alignment for initialized extents.
|
||||
* Hence, the maximum number of blocks we can have in an *initialized*
|
||||
* extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767).
|
||||
*/
|
||||
#define EXT_INIT_MAX_LEN (1UL << 15)
|
||||
#define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1)
|
||||
|
||||
|
||||
#define EXT_FIRST_EXTENT(__hdr__) \
|
||||
((struct ext4_extent *) (((char *) (__hdr__)) + \
|
||||
sizeof(struct ext4_extent_header)))
|
||||
#define EXT_FIRST_INDEX(__hdr__) \
|
||||
((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
|
||||
sizeof(struct ext4_extent_header)))
|
||||
#define EXT_HAS_FREE_INDEX(__path__) \
|
||||
(le16_to_cpu((__path__)->p_hdr->eh_entries) \
|
||||
< le16_to_cpu((__path__)->p_hdr->eh_max))
|
||||
#define EXT_LAST_EXTENT(__hdr__) \
|
||||
(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
|
||||
#define EXT_LAST_INDEX(__hdr__) \
|
||||
(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
|
||||
#define EXT_MAX_EXTENT(__hdr__) \
|
||||
(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
|
||||
#define EXT_MAX_INDEX(__hdr__) \
|
||||
(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
|
||||
|
||||
static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
|
||||
{
|
||||
return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
|
||||
}
|
||||
|
||||
static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
|
||||
{
|
||||
return (struct ext4_extent_header *) bh->b_data;
|
||||
}
|
||||
|
||||
static inline unsigned short ext_depth(struct inode *inode)
|
||||
{
|
||||
return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
|
||||
}
|
||||
|
||||
static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext)
|
||||
{
|
||||
/* We can not have an unwritten extent of zero length! */
|
||||
BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
|
||||
ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
|
||||
}
|
||||
|
||||
static inline int ext4_ext_is_unwritten(struct ext4_extent *ext)
|
||||
{
|
||||
/* Extent with ee_len of 0x8000 is treated as an initialized extent */
|
||||
return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
|
||||
}
|
||||
|
||||
static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
|
||||
{
|
||||
return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
|
||||
le16_to_cpu(ext->ee_len) :
|
||||
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
|
||||
}
|
||||
|
||||
static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
|
||||
{
|
||||
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_ext_pblock:
|
||||
* combine low and high parts of physical block number into ext4_fsblk_t
|
||||
*/
|
||||
static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
|
||||
{
|
||||
ext4_fsblk_t block;
|
||||
|
||||
block = le32_to_cpu(ex->ee_start_lo);
|
||||
block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
|
||||
return block;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_idx_pblock:
|
||||
* combine low and high parts of a leaf physical block number into ext4_fsblk_t
|
||||
*/
|
||||
static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
|
||||
{
|
||||
ext4_fsblk_t block;
|
||||
|
||||
block = le32_to_cpu(ix->ei_leaf_lo);
|
||||
block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
|
||||
return block;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_ext_store_pblock:
|
||||
* stores a large physical block number into an extent struct,
|
||||
* breaking it into parts
|
||||
*/
|
||||
static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
|
||||
ext4_fsblk_t pb)
|
||||
{
|
||||
ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
|
||||
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
|
||||
0xffff);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_idx_store_pblock:
|
||||
* stores a large physical block number into an index struct,
|
||||
* breaking it into parts
|
||||
*/
|
||||
static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
|
||||
ext4_fsblk_t pb)
|
||||
{
|
||||
ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
|
||||
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
|
||||
0xffff);
|
||||
}
|
||||
|
||||
#define ext4_ext_dirty(handle, inode, path) \
|
||||
__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
|
||||
int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
|
||||
struct inode *inode, struct ext4_ext_path *path);
|
||||
|
||||
#endif /* _EXT4_EXTENTS */
|
||||
|
321
fs/ext4/ext4_jbd2.c
Normal file
321
fs/ext4/ext4_jbd2.c
Normal file
|
@ -0,0 +1,321 @@
|
|||
/*
|
||||
* Interface between ext4 and JBD
|
||||
*/
|
||||
|
||||
#include "ext4_jbd2.h"
|
||||
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
/* Just increment the non-pointer handle value */
|
||||
static handle_t *ext4_get_nojournal(void)
|
||||
{
|
||||
handle_t *handle = current->journal_info;
|
||||
unsigned long ref_cnt = (unsigned long)handle;
|
||||
|
||||
BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
|
||||
|
||||
ref_cnt++;
|
||||
handle = (handle_t *)ref_cnt;
|
||||
|
||||
current->journal_info = handle;
|
||||
return handle;
|
||||
}
|
||||
|
||||
|
||||
/* Decrement the non-pointer handle value */
|
||||
static void ext4_put_nojournal(handle_t *handle)
|
||||
{
|
||||
unsigned long ref_cnt = (unsigned long)handle;
|
||||
|
||||
BUG_ON(ref_cnt == 0);
|
||||
|
||||
ref_cnt--;
|
||||
handle = (handle_t *)ref_cnt;
|
||||
|
||||
current->journal_info = handle;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrappers for jbd2_journal_start/end.
|
||||
*/
|
||||
static int ext4_journal_check_start(struct super_block *sb)
|
||||
{
|
||||
journal_t *journal;
|
||||
|
||||
might_sleep();
|
||||
if (sb->s_flags & MS_RDONLY && !ext4_journal_current_handle())
|
||||
return -EROFS;
|
||||
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
|
||||
journal = EXT4_SB(sb)->s_journal;
|
||||
/*
|
||||
* Special case here: if the journal has aborted behind our
|
||||
* backs (eg. EIO in the commit thread), then we still need to
|
||||
* take the FS itself readonly cleanly.
|
||||
*/
|
||||
if (journal && is_journal_aborted(journal)) {
|
||||
ext4_abort(sb, "Detected aborted journal");
|
||||
return -EROFS;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
|
||||
int type, int blocks, int rsv_blocks)
|
||||
{
|
||||
journal_t *journal;
|
||||
int err;
|
||||
|
||||
trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_);
|
||||
err = ext4_journal_check_start(sb);
|
||||
if (err < 0)
|
||||
return ERR_PTR(err);
|
||||
|
||||
journal = EXT4_SB(sb)->s_journal;
|
||||
if (!journal)
|
||||
return ext4_get_nojournal();
|
||||
return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS,
|
||||
type, line);
|
||||
}
|
||||
|
||||
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
|
||||
{
|
||||
struct super_block *sb;
|
||||
int err;
|
||||
int rc;
|
||||
|
||||
if (!ext4_handle_valid(handle)) {
|
||||
ext4_put_nojournal(handle);
|
||||
return 0;
|
||||
}
|
||||
sb = handle->h_transaction->t_journal->j_private;
|
||||
err = handle->h_err;
|
||||
rc = jbd2_journal_stop(handle);
|
||||
|
||||
if (!err)
|
||||
err = rc;
|
||||
if (err)
|
||||
__ext4_std_error(sb, where, line, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
|
||||
int type)
|
||||
{
|
||||
struct super_block *sb;
|
||||
int err;
|
||||
|
||||
if (!ext4_handle_valid(handle))
|
||||
return ext4_get_nojournal();
|
||||
|
||||
sb = handle->h_journal->j_private;
|
||||
trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits,
|
||||
_RET_IP_);
|
||||
err = ext4_journal_check_start(sb);
|
||||
if (err < 0) {
|
||||
jbd2_journal_free_reserved(handle);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
err = jbd2_journal_start_reserved(handle, type, line);
|
||||
if (err < 0)
|
||||
return ERR_PTR(err);
|
||||
return handle;
|
||||
}
|
||||
|
||||
static void ext4_journal_abort_handle(const char *caller, unsigned int line,
|
||||
const char *err_fn,
|
||||
struct buffer_head *bh,
|
||||
handle_t *handle, int err)
|
||||
{
|
||||
char nbuf[16];
|
||||
const char *errstr = ext4_decode_error(NULL, err, nbuf);
|
||||
|
||||
BUG_ON(!ext4_handle_valid(handle));
|
||||
|
||||
if (bh)
|
||||
BUFFER_TRACE(bh, "abort");
|
||||
|
||||
if (!handle->h_err)
|
||||
handle->h_err = err;
|
||||
|
||||
if (is_handle_aborted(handle))
|
||||
return;
|
||||
|
||||
printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
|
||||
caller, line, errstr, err_fn);
|
||||
|
||||
jbd2_journal_abort_handle(handle);
|
||||
}
|
||||
|
||||
int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_get_write_access(handle, bh);
|
||||
if (err)
|
||||
ext4_journal_abort_handle(where, line, __func__, bh,
|
||||
handle, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* The ext4 forget function must perform a revoke if we are freeing data
|
||||
* which has been journaled. Metadata (eg. indirect blocks) must be
|
||||
* revoked in all cases.
|
||||
*
|
||||
* "bh" may be NULL: a metadata block may have been freed from memory
|
||||
* but there may still be a record of it in the journal, and that record
|
||||
* still needs to be revoked.
|
||||
*
|
||||
* If the handle isn't valid we're not journaling, but we still need to
|
||||
* call into ext4_journal_revoke() to put the buffer head.
|
||||
*/
|
||||
int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
|
||||
int is_metadata, struct inode *inode,
|
||||
struct buffer_head *bh, ext4_fsblk_t blocknr)
|
||||
{
|
||||
int err;
|
||||
|
||||
might_sleep();
|
||||
|
||||
trace_ext4_forget(inode, is_metadata, blocknr);
|
||||
BUFFER_TRACE(bh, "enter");
|
||||
|
||||
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
|
||||
"data mode %x\n",
|
||||
bh, is_metadata, inode->i_mode,
|
||||
test_opt(inode->i_sb, DATA_FLAGS));
|
||||
|
||||
/* In the no journal case, we can just do a bforget and return */
|
||||
if (!ext4_handle_valid(handle)) {
|
||||
bforget(bh);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Never use the revoke function if we are doing full data
|
||||
* journaling: there is no need to, and a V1 superblock won't
|
||||
* support it. Otherwise, only skip the revoke on un-journaled
|
||||
* data blocks. */
|
||||
|
||||
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
|
||||
(!is_metadata && !ext4_should_journal_data(inode))) {
|
||||
if (bh) {
|
||||
BUFFER_TRACE(bh, "call jbd2_journal_forget");
|
||||
err = jbd2_journal_forget(handle, bh);
|
||||
if (err)
|
||||
ext4_journal_abort_handle(where, line, __func__,
|
||||
bh, handle, err);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* data!=journal && (is_metadata || should_journal_data(inode))
|
||||
*/
|
||||
BUFFER_TRACE(bh, "call jbd2_journal_revoke");
|
||||
err = jbd2_journal_revoke(handle, blocknr, bh);
|
||||
if (err) {
|
||||
ext4_journal_abort_handle(where, line, __func__,
|
||||
bh, handle, err);
|
||||
__ext4_abort(inode->i_sb, where, line,
|
||||
"error %d when attempting revoke", err);
|
||||
}
|
||||
BUFFER_TRACE(bh, "exit");
|
||||
return err;
|
||||
}
|
||||
|
||||
int __ext4_journal_get_create_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_get_create_access(handle, bh);
|
||||
if (err)
|
||||
ext4_journal_abort_handle(where, line, __func__,
|
||||
bh, handle, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
||||
handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
might_sleep();
|
||||
|
||||
set_buffer_meta(bh);
|
||||
set_buffer_prio(bh);
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_dirty_metadata(handle, bh);
|
||||
/* Errors can only happen due to aborted journal or a nasty bug */
|
||||
if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
|
||||
ext4_journal_abort_handle(where, line, __func__, bh,
|
||||
handle, err);
|
||||
if (inode == NULL) {
|
||||
pr_err("EXT4: jbd2_journal_dirty_metadata "
|
||||
"failed: handle type %u started at "
|
||||
"line %u, credits %u/%u, errcode %d",
|
||||
handle->h_type,
|
||||
handle->h_line_no,
|
||||
handle->h_requested_credits,
|
||||
handle->h_buffer_credits, err);
|
||||
return err;
|
||||
}
|
||||
ext4_error_inode(inode, where, line,
|
||||
bh->b_blocknr,
|
||||
"journal_dirty_metadata failed: "
|
||||
"handle type %u started at line %u, "
|
||||
"credits %u/%u, errcode %d",
|
||||
handle->h_type,
|
||||
handle->h_line_no,
|
||||
handle->h_requested_credits,
|
||||
handle->h_buffer_credits, err);
|
||||
}
|
||||
} else {
|
||||
if (inode)
|
||||
mark_buffer_dirty_inode(bh, inode);
|
||||
else
|
||||
mark_buffer_dirty(bh);
|
||||
if (inode && inode_needs_sync(inode)) {
|
||||
sync_dirty_buffer(bh);
|
||||
if (buffer_req(bh) && !buffer_uptodate(bh)) {
|
||||
struct ext4_super_block *es;
|
||||
|
||||
es = EXT4_SB(inode->i_sb)->s_es;
|
||||
es->s_last_error_block =
|
||||
cpu_to_le64(bh->b_blocknr);
|
||||
ext4_error_inode(inode, where, line,
|
||||
bh->b_blocknr,
|
||||
"IO error syncing itable block");
|
||||
err = -EIO;
|
||||
}
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int __ext4_handle_dirty_super(const char *where, unsigned int line,
|
||||
handle_t *handle, struct super_block *sb)
|
||||
{
|
||||
struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
|
||||
int err = 0;
|
||||
|
||||
ext4_superblock_csum_set(sb);
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_dirty_metadata(handle, bh);
|
||||
if (err)
|
||||
ext4_journal_abort_handle(where, line, __func__,
|
||||
bh, handle, err);
|
||||
} else
|
||||
mark_buffer_dirty(bh);
|
||||
return err;
|
||||
}
|
450
fs/ext4/ext4_jbd2.h
Normal file
450
fs/ext4/ext4_jbd2.h
Normal file
|
@ -0,0 +1,450 @@
|
|||
/*
|
||||
* ext4_jbd2.h
|
||||
*
|
||||
* Written by Stephen C. Tweedie <sct@redhat.com>, 1999
|
||||
*
|
||||
* Copyright 1998--1999 Red Hat corp --- All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel and is made available under
|
||||
* the terms of the GNU General Public License, version 2, or at your
|
||||
* option, any later version, incorporated herein by reference.
|
||||
*
|
||||
* Ext4-specific journaling extensions.
|
||||
*/
|
||||
|
||||
#ifndef _EXT4_JBD2_H
|
||||
#define _EXT4_JBD2_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include "ext4.h"
|
||||
|
||||
#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
|
||||
|
||||
/* Define the number of blocks we need to account to a transaction to
|
||||
* modify one block of data.
|
||||
*
|
||||
* We may have to touch one inode, one bitmap buffer, up to three
|
||||
* indirection blocks, the group and superblock summaries, and the data
|
||||
* block to complete the transaction.
|
||||
*
|
||||
* For extents-enabled fs we may have to allocate and modify up to
|
||||
* 5 levels of tree, data block (for each of these we need bitmap + group
|
||||
* summaries), root which is stored in the inode, sb
|
||||
*/
|
||||
|
||||
#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
|
||||
(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
|
||||
? 20U : 8U)
|
||||
|
||||
/* Extended attribute operations touch at most two data buffers,
|
||||
* two bitmap buffers, and two group summaries, in addition to the inode
|
||||
* and the superblock, which are already accounted for. */
|
||||
|
||||
#define EXT4_XATTR_TRANS_BLOCKS 6U
|
||||
|
||||
/* Define the minimum size for a transaction which modifies data. This
|
||||
* needs to take into account the fact that we may end up modifying two
|
||||
* quota files too (one for the group, one for the user quota). The
|
||||
* superblock only gets updated once, of course, so don't bother
|
||||
* counting that again for the quota updates. */
|
||||
|
||||
#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
|
||||
EXT4_XATTR_TRANS_BLOCKS - 2 + \
|
||||
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
|
||||
|
||||
/*
|
||||
* Define the number of metadata blocks we need to account to modify data.
|
||||
*
|
||||
* This include super block, inode block, quota blocks and xattr blocks
|
||||
*/
|
||||
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
|
||||
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
|
||||
|
||||
/* Define an arbitrary limit for the amount of data we will anticipate
|
||||
* writing to any given transaction. For unbounded transactions such as
|
||||
* write(2) and truncate(2) we can write more than this, but we always
|
||||
* start off at the maximum transaction size and grow the transaction
|
||||
* optimistically as we go. */
|
||||
|
||||
#define EXT4_MAX_TRANS_DATA 64U
|
||||
|
||||
/* We break up a large truncate or write transaction once the handle's
|
||||
* buffer credits gets this low, we need either to extend the
|
||||
* transaction or to start a new one. Reserve enough space here for
|
||||
* inode, bitmap, superblock, group and indirection updates for at least
|
||||
* one block, plus two quota updates. Quota allocations are not
|
||||
* needed. */
|
||||
|
||||
#define EXT4_RESERVE_TRANS_BLOCKS 12U
|
||||
|
||||
#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
|
||||
|
||||
#ifdef CONFIG_QUOTA
|
||||
/* Amount of blocks needed for quota update - we know that the structure was
|
||||
* allocated so we need to update only data block */
|
||||
#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
|
||||
EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
|
||||
1 : 0)
|
||||
/* Amount of blocks needed for quota insert/delete - we do some block writes
|
||||
* but inode, sb and group updates are done only once */
|
||||
#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
|
||||
EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
|
||||
(DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
|
||||
+3+DQUOT_INIT_REWRITE) : 0)
|
||||
|
||||
#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
|
||||
EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
|
||||
(DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
|
||||
+3+DQUOT_DEL_REWRITE) : 0)
|
||||
#else
|
||||
#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
|
||||
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
|
||||
#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
|
||||
#endif
|
||||
#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
||||
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
|
||||
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
|
||||
|
||||
static inline int ext4_jbd2_credits_xattr(struct inode *inode)
|
||||
{
|
||||
int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
|
||||
|
||||
/*
|
||||
* In case of inline data, we may push out the data to a block,
|
||||
* so we need to reserve credits for this eventuality
|
||||
*/
|
||||
if (ext4_has_inline_data(inode))
|
||||
credits += ext4_writepage_trans_blocks(inode) + 1;
|
||||
return credits;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Ext4 handle operation types -- for logging purposes
|
||||
*/
|
||||
#define EXT4_HT_MISC 0
|
||||
#define EXT4_HT_INODE 1
|
||||
#define EXT4_HT_WRITE_PAGE 2
|
||||
#define EXT4_HT_MAP_BLOCKS 3
|
||||
#define EXT4_HT_DIR 4
|
||||
#define EXT4_HT_TRUNCATE 5
|
||||
#define EXT4_HT_QUOTA 6
|
||||
#define EXT4_HT_RESIZE 7
|
||||
#define EXT4_HT_MIGRATE 8
|
||||
#define EXT4_HT_MOVE_EXTENTS 9
|
||||
#define EXT4_HT_XATTR 10
|
||||
#define EXT4_HT_EXT_CONVERT 11
|
||||
#define EXT4_HT_MAX 12
|
||||
|
||||
/**
|
||||
* struct ext4_journal_cb_entry - Base structure for callback information.
|
||||
*
|
||||
* This struct is a 'seed' structure for a using with your own callback
|
||||
* structs. If you are using callbacks you must allocate one of these
|
||||
* or another struct of your own definition which has this struct
|
||||
* as it's first element and pass it to ext4_journal_callback_add().
|
||||
*/
|
||||
struct ext4_journal_cb_entry {
|
||||
/* list information for other callbacks attached to the same handle */
|
||||
struct list_head jce_list;
|
||||
|
||||
/* Function to call with this callback structure */
|
||||
void (*jce_func)(struct super_block *sb,
|
||||
struct ext4_journal_cb_entry *jce, int error);
|
||||
|
||||
/* user data goes here */
|
||||
};
|
||||
|
||||
/**
|
||||
* ext4_journal_callback_add: add a function to call after transaction commit
|
||||
* @handle: active journal transaction handle to register callback on
|
||||
* @func: callback function to call after the transaction has committed:
|
||||
* @sb: superblock of current filesystem for transaction
|
||||
* @jce: returned journal callback data
|
||||
* @rc: journal state at commit (0 = transaction committed properly)
|
||||
* @jce: journal callback data (internal and function private data struct)
|
||||
*
|
||||
* The registered function will be called in the context of the journal thread
|
||||
* after the transaction for which the handle was created has completed.
|
||||
*
|
||||
* No locks are held when the callback function is called, so it is safe to
|
||||
* call blocking functions from within the callback, but the callback should
|
||||
* not block or run for too long, or the filesystem will be blocked waiting for
|
||||
* the next transaction to commit. No journaling functions can be used, or
|
||||
* there is a risk of deadlock.
|
||||
*
|
||||
* There is no guaranteed calling order of multiple registered callbacks on
|
||||
* the same transaction.
|
||||
*/
|
||||
static inline void ext4_journal_callback_add(handle_t *handle,
|
||||
void (*func)(struct super_block *sb,
|
||||
struct ext4_journal_cb_entry *jce,
|
||||
int rc),
|
||||
struct ext4_journal_cb_entry *jce)
|
||||
{
|
||||
struct ext4_sb_info *sbi =
|
||||
EXT4_SB(handle->h_transaction->t_journal->j_private);
|
||||
|
||||
/* Add the jce to transaction's private list */
|
||||
jce->jce_func = func;
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_journal_callback_del: delete a registered callback
|
||||
* @handle: active journal transaction handle on which callback was registered
|
||||
* @jce: registered journal callback entry to unregister
|
||||
* Return true if object was successfully removed
|
||||
*/
|
||||
static inline bool ext4_journal_callback_try_del(handle_t *handle,
|
||||
struct ext4_journal_cb_entry *jce)
|
||||
{
|
||||
bool deleted;
|
||||
struct ext4_sb_info *sbi =
|
||||
EXT4_SB(handle->h_transaction->t_journal->j_private);
|
||||
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
deleted = !list_empty(&jce->jce_list);
|
||||
list_del_init(&jce->jce_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
int
|
||||
ext4_mark_iloc_dirty(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct ext4_iloc *iloc);
|
||||
|
||||
/*
|
||||
* On success, We end up with an outstanding reference count against
|
||||
* iloc->bh. This _must_ be cleaned up later.
|
||||
*/
|
||||
|
||||
int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
|
||||
struct ext4_iloc *iloc);
|
||||
|
||||
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
|
||||
|
||||
/*
|
||||
* Wrapper functions with which ext4 calls into JBD.
|
||||
*/
|
||||
int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh);
|
||||
|
||||
int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
|
||||
int is_metadata, struct inode *inode,
|
||||
struct buffer_head *bh, ext4_fsblk_t blocknr);
|
||||
|
||||
int __ext4_journal_get_create_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh);
|
||||
|
||||
int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
||||
handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh);
|
||||
|
||||
int __ext4_handle_dirty_super(const char *where, unsigned int line,
|
||||
handle_t *handle, struct super_block *sb);
|
||||
|
||||
#define ext4_journal_get_write_access(handle, bh) \
|
||||
__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
|
||||
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
|
||||
__ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
|
||||
(bh), (block_nr))
|
||||
#define ext4_journal_get_create_access(handle, bh) \
|
||||
__ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
|
||||
#define ext4_handle_dirty_metadata(handle, inode, bh) \
|
||||
__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
|
||||
(bh))
|
||||
#define ext4_handle_dirty_super(handle, sb) \
|
||||
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
|
||||
|
||||
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
|
||||
int type, int blocks, int rsv_blocks);
|
||||
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
|
||||
|
||||
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
|
||||
|
||||
/* Note: Do not use this for NULL handles. This is only to determine if
|
||||
* a properly allocated handle is using a journal or not. */
|
||||
static inline int ext4_handle_valid(handle_t *handle)
|
||||
{
|
||||
if ((unsigned long)handle < EXT4_NOJOURNAL_MAX_REF_COUNT)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void ext4_handle_sync(handle_t *handle)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
handle->h_sync = 1;
|
||||
}
|
||||
|
||||
static inline int ext4_handle_is_aborted(handle_t *handle)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
return is_handle_aborted(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
|
||||
{
|
||||
if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define ext4_journal_start_sb(sb, type, nblocks) \
|
||||
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0)
|
||||
|
||||
#define ext4_journal_start(inode, type, nblocks) \
|
||||
__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0)
|
||||
|
||||
#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \
|
||||
__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks))
|
||||
|
||||
static inline handle_t *__ext4_journal_start(struct inode *inode,
|
||||
unsigned int line, int type,
|
||||
int blocks, int rsv_blocks)
|
||||
{
|
||||
return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
|
||||
rsv_blocks);
|
||||
}
|
||||
|
||||
#define ext4_journal_stop(handle) \
|
||||
__ext4_journal_stop(__func__, __LINE__, (handle))
|
||||
|
||||
#define ext4_journal_start_reserved(handle, type) \
|
||||
__ext4_journal_start_reserved((handle), __LINE__, (type))
|
||||
|
||||
handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
|
||||
int type);
|
||||
|
||||
static inline void ext4_journal_free_reserved(handle_t *handle)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
jbd2_journal_free_reserved(handle);
|
||||
}
|
||||
|
||||
static inline handle_t *ext4_journal_current_handle(void)
|
||||
{
|
||||
return journal_current_handle();
|
||||
}
|
||||
|
||||
static inline int ext4_journal_extend(handle_t *handle, int nblocks)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
return jbd2_journal_extend(handle, nblocks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_journal_restart(handle_t *handle, int nblocks)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
return jbd2_journal_restart(handle, nblocks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_journal_blocks_per_page(struct inode *inode)
|
||||
{
|
||||
if (EXT4_JOURNAL(inode) != NULL)
|
||||
return jbd2_journal_blocks_per_page(inode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_journal_force_commit(journal_t *journal)
|
||||
{
|
||||
if (journal)
|
||||
return jbd2_journal_force_commit(journal);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ext4_update_inode_fsync_trans(handle_t *handle,
|
||||
struct inode *inode,
|
||||
int datasync)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
|
||||
if (ext4_handle_valid(handle)) {
|
||||
ei->i_sync_tid = handle->h_transaction->t_tid;
|
||||
if (datasync)
|
||||
ei->i_datasync_tid = handle->h_transaction->t_tid;
|
||||
}
|
||||
}
|
||||
|
||||
/* super.c */
|
||||
int ext4_force_commit(struct super_block *sb);
|
||||
|
||||
/*
|
||||
* Ext4 inode journal modes
|
||||
*/
|
||||
#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
|
||||
#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
|
||||
#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
|
||||
|
||||
static inline int ext4_inode_journal_mode(struct inode *inode)
|
||||
{
|
||||
if (EXT4_JOURNAL(inode) == NULL)
|
||||
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
|
||||
/* We do not support data journalling with delayed allocation */
|
||||
if (!S_ISREG(inode->i_mode) ||
|
||||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
|
||||
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
|
||||
!test_opt(inode->i_sb, DELALLOC))
|
||||
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
|
||||
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
|
||||
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
|
||||
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
|
||||
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
|
||||
static inline int ext4_should_journal_data(struct inode *inode)
|
||||
{
|
||||
return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
|
||||
}
|
||||
|
||||
static inline int ext4_should_order_data(struct inode *inode)
|
||||
{
|
||||
return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
|
||||
}
|
||||
|
||||
static inline int ext4_should_writeback_data(struct inode *inode)
|
||||
{
|
||||
return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function controls whether or not we should try to go down the
|
||||
* dioread_nolock code paths, which makes it safe to avoid taking
|
||||
* i_mutex for direct I/O reads. This only works for extent-based
|
||||
* files, and it doesn't work if data journaling is enabled, since the
|
||||
* dioread_nolock code uses b_private to pass information back to the
|
||||
* I/O completion handler, and this conflicts with the jbd's use of
|
||||
* b_private.
|
||||
*/
|
||||
static inline int ext4_should_dioread_nolock(struct inode *inode)
|
||||
{
|
||||
if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
|
||||
return 0;
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return 0;
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
return 0;
|
||||
if (ext4_should_journal_data(inode))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* _EXT4_JBD2_H */
|
5742
fs/ext4/extents.c
Normal file
5742
fs/ext4/extents.c
Normal file
File diff suppressed because it is too large
Load diff
1299
fs/ext4/extents_status.c
Normal file
1299
fs/ext4/extents_status.c
Normal file
File diff suppressed because it is too large
Load diff
157
fs/ext4/extents_status.h
Normal file
157
fs/ext4/extents_status.h
Normal file
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* fs/ext4/extents_status.h
|
||||
*
|
||||
* Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
|
||||
* Modified by
|
||||
* Allison Henderson <achender@linux.vnet.ibm.com>
|
||||
* Zheng Liu <wenqing.lz@taobao.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _EXT4_EXTENTS_STATUS_H
|
||||
#define _EXT4_EXTENTS_STATUS_H
|
||||
|
||||
/*
|
||||
* Turn on ES_DEBUG__ to get lots of info about extent status operations.
|
||||
*/
|
||||
#ifdef ES_DEBUG__
|
||||
#define es_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* With ES_AGGRESSIVE_TEST defined, the result of es caching will be
|
||||
* checked with old map_block's result.
|
||||
*/
|
||||
#define ES_AGGRESSIVE_TEST__
|
||||
|
||||
/*
|
||||
* These flags live in the high bits of extent_status.es_pblk
|
||||
*/
|
||||
#define ES_SHIFT 60
|
||||
|
||||
#define EXTENT_STATUS_WRITTEN (1 << 3)
|
||||
#define EXTENT_STATUS_UNWRITTEN (1 << 2)
|
||||
#define EXTENT_STATUS_DELAYED (1 << 1)
|
||||
#define EXTENT_STATUS_HOLE (1 << 0)
|
||||
|
||||
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
|
||||
EXTENT_STATUS_UNWRITTEN | \
|
||||
EXTENT_STATUS_DELAYED | \
|
||||
EXTENT_STATUS_HOLE)
|
||||
|
||||
#define ES_WRITTEN (1ULL << 63)
|
||||
#define ES_UNWRITTEN (1ULL << 62)
|
||||
#define ES_DELAYED (1ULL << 61)
|
||||
#define ES_HOLE (1ULL << 60)
|
||||
|
||||
#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
|
||||
ES_DELAYED | ES_HOLE)
|
||||
|
||||
struct ext4_sb_info;
|
||||
struct ext4_extent;
|
||||
|
||||
struct extent_status {
|
||||
struct rb_node rb_node;
|
||||
ext4_lblk_t es_lblk; /* first logical block extent covers */
|
||||
ext4_lblk_t es_len; /* length of extent in block */
|
||||
ext4_fsblk_t es_pblk; /* first physical block */
|
||||
};
|
||||
|
||||
struct ext4_es_tree {
|
||||
struct rb_root root;
|
||||
struct extent_status *cache_es; /* recently accessed extent */
|
||||
};
|
||||
|
||||
struct ext4_es_stats {
|
||||
unsigned long es_stats_last_sorted;
|
||||
unsigned long es_stats_shrunk;
|
||||
unsigned long es_stats_cache_hits;
|
||||
unsigned long es_stats_cache_misses;
|
||||
u64 es_stats_scan_time;
|
||||
u64 es_stats_max_scan_time;
|
||||
struct percpu_counter es_stats_all_cnt;
|
||||
struct percpu_counter es_stats_lru_cnt;
|
||||
};
|
||||
|
||||
extern int __init ext4_init_es(void);
|
||||
extern void ext4_exit_es(void);
|
||||
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
|
||||
|
||||
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||
unsigned int status);
|
||||
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||
unsigned int status);
|
||||
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
extern void ext4_es_find_delayed_extent_range(struct inode *inode,
|
||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||
struct extent_status *es);
|
||||
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
struct extent_status *es);
|
||||
|
||||
static inline int ext4_es_is_written(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_WRITTEN) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_unwritten(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_UNWRITTEN) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_delayed(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_DELAYED) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_hole(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_HOLE) != 0;
|
||||
}
|
||||
|
||||
static inline unsigned int ext4_es_status(struct extent_status *es)
|
||||
{
|
||||
return es->es_pblk >> ES_SHIFT;
|
||||
}
|
||||
|
||||
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
|
||||
{
|
||||
return es->es_pblk & ~ES_MASK;
|
||||
}
|
||||
|
||||
static inline void ext4_es_store_pblock(struct extent_status *es,
|
||||
ext4_fsblk_t pb)
|
||||
{
|
||||
ext4_fsblk_t block;
|
||||
|
||||
block = (pb & ~ES_MASK) | (es->es_pblk & ES_MASK);
|
||||
es->es_pblk = block;
|
||||
}
|
||||
|
||||
static inline void ext4_es_store_status(struct extent_status *es,
|
||||
unsigned int status)
|
||||
{
|
||||
es->es_pblk = (((ext4_fsblk_t)
|
||||
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
|
||||
(es->es_pblk & ~ES_MASK));
|
||||
}
|
||||
|
||||
static inline void ext4_es_store_pblock_status(struct extent_status *es,
|
||||
ext4_fsblk_t pb,
|
||||
unsigned int status)
|
||||
{
|
||||
es->es_pblk = (((ext4_fsblk_t)
|
||||
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
|
||||
(pb & ~ES_MASK));
|
||||
}
|
||||
|
||||
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
|
||||
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
|
||||
extern void ext4_es_lru_add(struct inode *inode);
|
||||
extern void ext4_es_lru_del(struct inode *inode);
|
||||
|
||||
#endif /* _EXT4_EXTENTS_STATUS_H */
|
614
fs/ext4/file.c
Normal file
614
fs/ext4/file.c
Normal file
|
@ -0,0 +1,614 @@
|
|||
/*
|
||||
* linux/fs/ext4/file.c
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*
|
||||
* from
|
||||
*
|
||||
* linux/fs/minix/file.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* ext4 fs regular file handling primitives
|
||||
*
|
||||
* 64-bit file support on 64-bit platforms by Jakub Jelinek
|
||||
* (jj@sunsite.ms.mff.cuni.cz)
|
||||
*/
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/path.h>
|
||||
#include <linux/aio.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
/*
|
||||
* Called when an inode is released. Note that this is different
|
||||
* from ext4_file_open: open gets called at every open, but release
|
||||
* gets called only when /all/ the files are closed.
|
||||
*/
|
||||
static int ext4_release_file(struct inode *inode, struct file *filp)
|
||||
{
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
|
||||
ext4_alloc_da_blocks(inode);
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
|
||||
}
|
||||
/* if we are the last writer on the inode, drop the block reservation */
|
||||
if ((filp->f_mode & FMODE_WRITE) &&
|
||||
(atomic_read(&inode->i_writecount) == 1) &&
|
||||
!EXT4_I(inode)->i_reserved_data_blocks)
|
||||
{
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
}
|
||||
if (is_dx(inode) && filp->private_data)
|
||||
ext4_htree_free_dir_info(filp->private_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_unwritten_wait(struct inode *inode)
|
||||
{
|
||||
wait_queue_head_t *wq = ext4_ioend_wq(inode);
|
||||
|
||||
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
|
||||
}
|
||||
|
||||
/*
|
||||
* This tests whether the IO in question is block-aligned or not.
|
||||
* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
|
||||
* are converted to written only after the IO is complete. Until they are
|
||||
* mapped, these blocks appear as holes, so dio_zero_block() will assume that
|
||||
* it needs to zero out portions of the start and/or end block. If 2 AIO
|
||||
* threads are at work on the same unwritten block, they must be synchronized
|
||||
* or one thread will zero the other's data, causing corruption.
|
||||
*/
|
||||
static int
|
||||
ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int blockmask = sb->s_blocksize - 1;
|
||||
|
||||
if (pos >= i_size_read(inode))
|
||||
return 0;
|
||||
|
||||
if ((pos | iov_iter_alignment(from)) & blockmask)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct mutex *aio_mutex = NULL;
|
||||
struct blk_plug plug;
|
||||
int o_direct = file->f_flags & O_DIRECT;
|
||||
int overwrite = 0;
|
||||
size_t length = iov_iter_count(from);
|
||||
ssize_t ret;
|
||||
loff_t pos = iocb->ki_pos;
|
||||
|
||||
/*
|
||||
* Unaligned direct AIO must be serialized; see comment above
|
||||
* In the case of O_APPEND, assume that we must always serialize
|
||||
*/
|
||||
if (o_direct &&
|
||||
ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
|
||||
!is_sync_kiocb(iocb) &&
|
||||
(file->f_flags & O_APPEND ||
|
||||
ext4_unaligned_aio(inode, from, pos))) {
|
||||
aio_mutex = ext4_aio_mutex(inode);
|
||||
mutex_lock(aio_mutex);
|
||||
ext4_unwritten_wait(inode);
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (file->f_flags & O_APPEND)
|
||||
iocb->ki_pos = pos = i_size_read(inode);
|
||||
|
||||
/*
|
||||
* If we have encountered a bitmap-format file, the size limit
|
||||
* is smaller than s_maxbytes, which is for extent-mapped files.
|
||||
*/
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
if ((pos > sbi->s_bitmap_maxbytes) ||
|
||||
(pos == sbi->s_bitmap_maxbytes && length > 0)) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
ret = -EFBIG;
|
||||
goto errout;
|
||||
}
|
||||
|
||||
if (pos + length > sbi->s_bitmap_maxbytes)
|
||||
iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
|
||||
}
|
||||
|
||||
iocb->private = &overwrite;
|
||||
if (o_direct) {
|
||||
blk_start_plug(&plug);
|
||||
|
||||
|
||||
/* check whether we do a DIO overwrite or not */
|
||||
if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
|
||||
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
|
||||
struct ext4_map_blocks map;
|
||||
unsigned int blkbits = inode->i_blkbits;
|
||||
int err, len;
|
||||
|
||||
map.m_lblk = pos >> blkbits;
|
||||
map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
|
||||
- map.m_lblk;
|
||||
len = map.m_len;
|
||||
|
||||
err = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
/*
|
||||
* 'err==len' means that all of blocks has
|
||||
* been preallocated no matter they are
|
||||
* initialized or not. For excluding
|
||||
* unwritten extents, we need to check
|
||||
* m_flags. There are two conditions that
|
||||
* indicate for initialized extents. 1) If we
|
||||
* hit extent cache, EXT4_MAP_MAPPED flag is
|
||||
* returned; 2) If we do a real lookup,
|
||||
* non-flags are returned. So we should check
|
||||
* these two conditions.
|
||||
*/
|
||||
if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
|
||||
overwrite = 1;
|
||||
}
|
||||
}
|
||||
|
||||
ret = __generic_file_write_iter(iocb, from);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if (ret > 0) {
|
||||
ssize_t err;
|
||||
|
||||
err = generic_write_sync(file, iocb->ki_pos - ret, ret);
|
||||
if (err < 0)
|
||||
ret = err;
|
||||
}
|
||||
if (o_direct)
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
errout:
|
||||
if (aio_mutex)
|
||||
mutex_unlock(aio_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
.page_mkwrite = ext4_page_mkwrite,
|
||||
.remap_pages = generic_file_remap_pages,
|
||||
};
|
||||
|
||||
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
file_accessed(file);
|
||||
vma->vm_ops = &ext4_file_vm_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_file_open(struct inode * inode, struct file * filp)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct vfsmount *mnt = filp->f_path.mnt;
|
||||
struct path path;
|
||||
char buf[64], *cp;
|
||||
|
||||
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
|
||||
!(sb->s_flags & MS_RDONLY))) {
|
||||
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
|
||||
/*
|
||||
* Sample where the filesystem has been mounted and
|
||||
* store it in the superblock for sysadmin convenience
|
||||
* when trying to sort through large numbers of block
|
||||
* devices or filesystem images.
|
||||
*/
|
||||
memset(buf, 0, sizeof(buf));
|
||||
path.mnt = mnt;
|
||||
path.dentry = mnt->mnt_root;
|
||||
cp = d_path(&path, buf, sizeof(buf));
|
||||
if (!IS_ERR(cp)) {
|
||||
handle_t *handle;
|
||||
int err;
|
||||
|
||||
handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
BUFFER_TRACE(sbi->s_sbh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
|
||||
if (err) {
|
||||
ext4_journal_stop(handle);
|
||||
return err;
|
||||
}
|
||||
strlcpy(sbi->s_es->s_last_mounted, cp,
|
||||
sizeof(sbi->s_es->s_last_mounted));
|
||||
ext4_handle_dirty_super(handle, sb);
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Set up the jbd2_inode if we are opening the inode for
|
||||
* writing and the journal is present
|
||||
*/
|
||||
if (filp->f_mode & FMODE_WRITE) {
|
||||
int ret = ext4_inode_attach_jinode(inode);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
return dquot_file_open(inode, filp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we use ext4_map_blocks() to get a block mapping for a extent-based
|
||||
* file rather than ext4_ext_walk_space() because we can introduce
|
||||
* SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
|
||||
* function. When extent status tree has been fully implemented, it will
|
||||
* track all extent status for a file and we can directly use it to
|
||||
* retrieve the offset for SEEK_DATA/SEEK_HOLE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
|
||||
* lookup page cache to check whether or not there has some data between
|
||||
* [startoff, endoff] because, if this range contains an unwritten extent,
|
||||
* we determine this extent as a data or a hole according to whether the
|
||||
* page cache has data or not.
|
||||
*/
|
||||
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||
int whence,
|
||||
struct ext4_map_blocks *map,
|
||||
loff_t *offset)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
unsigned int blkbits;
|
||||
pgoff_t index;
|
||||
pgoff_t end;
|
||||
loff_t endoff;
|
||||
loff_t startoff;
|
||||
loff_t lastoff;
|
||||
int found = 0;
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
startoff = *offset;
|
||||
lastoff = startoff;
|
||||
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
|
||||
|
||||
index = startoff >> PAGE_CACHE_SHIFT;
|
||||
end = endoff >> PAGE_CACHE_SHIFT;
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
do {
|
||||
int i, num;
|
||||
unsigned long nr_pages;
|
||||
|
||||
num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
|
||||
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
|
||||
(pgoff_t)num);
|
||||
if (nr_pages == 0) {
|
||||
if (whence == SEEK_DATA)
|
||||
break;
|
||||
|
||||
BUG_ON(whence != SEEK_HOLE);
|
||||
/*
|
||||
* If this is the first time to go into the loop and
|
||||
* offset is not beyond the end offset, it will be a
|
||||
* hole at this offset
|
||||
*/
|
||||
if (lastoff == startoff || lastoff < endoff)
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is the first time to go into the loop and
|
||||
* offset is smaller than the first page offset, it will be a
|
||||
* hole at this offset.
|
||||
*/
|
||||
if (lastoff == startoff && whence == SEEK_HOLE &&
|
||||
lastoff < page_offset(pvec.pages[0])) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
struct buffer_head *bh, *head;
|
||||
|
||||
/*
|
||||
* If the current offset is not beyond the end of given
|
||||
* range, it will be a hole.
|
||||
*/
|
||||
if (lastoff < endoff && whence == SEEK_HOLE &&
|
||||
page->index > end) {
|
||||
found = 1;
|
||||
*offset = lastoff;
|
||||
goto out;
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
|
||||
if (unlikely(page->mapping != inode->i_mapping)) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!page_has_buffers(page)) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (page_has_buffers(page)) {
|
||||
lastoff = page_offset(page);
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
if (buffer_uptodate(bh) ||
|
||||
buffer_unwritten(bh)) {
|
||||
if (whence == SEEK_DATA)
|
||||
found = 1;
|
||||
} else {
|
||||
if (whence == SEEK_HOLE)
|
||||
found = 1;
|
||||
}
|
||||
if (found) {
|
||||
*offset = max_t(loff_t,
|
||||
startoff, lastoff);
|
||||
unlock_page(page);
|
||||
goto out;
|
||||
}
|
||||
lastoff += bh->b_size;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
|
||||
lastoff = page_offset(page) + PAGE_SIZE;
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* The no. of pages is less than our desired, that would be a
|
||||
* hole in there.
|
||||
*/
|
||||
if (nr_pages < num && whence == SEEK_HOLE) {
|
||||
found = 1;
|
||||
*offset = lastoff;
|
||||
break;
|
||||
}
|
||||
|
||||
index = pvec.pages[i - 1]->index + 1;
|
||||
pagevec_release(&pvec);
|
||||
} while (index <= end);
|
||||
|
||||
out:
|
||||
pagevec_release(&pvec);
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_seek_data() retrieves the offset for SEEK_DATA.
|
||||
*/
|
||||
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_map_blocks map;
|
||||
struct extent_status es;
|
||||
ext4_lblk_t start, last, end;
|
||||
loff_t dataoff, isize;
|
||||
int blkbits;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (offset >= isize) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
start = offset >> blkbits;
|
||||
last = start;
|
||||
end = isize >> blkbits;
|
||||
dataoff = offset;
|
||||
|
||||
do {
|
||||
map.m_lblk = last;
|
||||
map.m_len = end - last + 1;
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
||||
if (last != start)
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a delay extent at this offset,
|
||||
* it will be as a data.
|
||||
*/
|
||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
||||
if (last != start)
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
||||
int unwritten;
|
||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||
&map, &dataoff);
|
||||
if (unwritten)
|
||||
break;
|
||||
}
|
||||
|
||||
last++;
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
} while (last <= end);
|
||||
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if (dataoff > isize)
|
||||
return -ENXIO;
|
||||
|
||||
return vfs_setpos(file, dataoff, maxsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_seek_hole() retrieves the offset for SEEK_HOLE.
|
||||
*/
|
||||
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_map_blocks map;
|
||||
struct extent_status es;
|
||||
ext4_lblk_t start, last, end;
|
||||
loff_t holeoff, isize;
|
||||
int blkbits;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (offset >= isize) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
start = offset >> blkbits;
|
||||
last = start;
|
||||
end = isize >> blkbits;
|
||||
holeoff = offset;
|
||||
|
||||
do {
|
||||
map.m_lblk = last;
|
||||
map.m_len = end - last + 1;
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
||||
last += ret;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a delay extent at this offset,
|
||||
* we will skip this extent.
|
||||
*/
|
||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
||||
last = es.es_lblk + es.es_len;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
||||
int unwritten;
|
||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||
&map, &holeoff);
|
||||
if (!unwritten) {
|
||||
last += ret;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* find a hole */
|
||||
break;
|
||||
} while (last <= end);
|
||||
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if (holeoff > isize)
|
||||
holeoff = isize;
|
||||
|
||||
return vfs_setpos(file, holeoff, maxsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
|
||||
* by calling generic_file_llseek_size() with the appropriate maxbytes
|
||||
* value for each.
|
||||
*/
|
||||
loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
loff_t maxbytes;
|
||||
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
|
||||
else
|
||||
maxbytes = inode->i_sb->s_maxbytes;
|
||||
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
case SEEK_CUR:
|
||||
case SEEK_END:
|
||||
return generic_file_llseek_size(file, offset, whence,
|
||||
maxbytes, i_size_read(inode));
|
||||
case SEEK_DATA:
|
||||
return ext4_seek_data(file, offset, maxbytes);
|
||||
case SEEK_HOLE:
|
||||
return ext4_seek_hole(file, offset, maxbytes);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
const struct file_operations ext4_file_operations = {
|
||||
.llseek = ext4_llseek,
|
||||
.read = new_sync_read,
|
||||
.write = new_sync_write,
|
||||
.read_iter = generic_file_read_iter,
|
||||
.write_iter = ext4_file_write_iter,
|
||||
.unlocked_ioctl = ext4_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ext4_compat_ioctl,
|
||||
#endif
|
||||
.mmap = ext4_file_mmap,
|
||||
.open = ext4_file_open,
|
||||
.release = ext4_release_file,
|
||||
.fsync = ext4_sync_file,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.fallocate = ext4_fallocate,
|
||||
};
|
||||
|
||||
const struct inode_operations ext4_file_inode_operations = {
|
||||
.setattr = ext4_setattr,
|
||||
.getattr = ext4_getattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = ext4_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.get_acl = ext4_get_acl,
|
||||
.set_acl = ext4_set_acl,
|
||||
.fiemap = ext4_fiemap,
|
||||
};
|
||||
|
151
fs/ext4/fsync.c
Normal file
151
fs/ext4/fsync.c
Normal file
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* linux/fs/ext4/fsync.c
|
||||
*
|
||||
* Copyright (C) 1993 Stephen Tweedie (sct@redhat.com)
|
||||
* from
|
||||
* Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
* from
|
||||
* linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* ext4fs fsync primitive
|
||||
*
|
||||
* Big-endian to little-endian byte-swapping/bitmaps by
|
||||
* David S. Miller (davem@caip.rutgers.edu), 1995
|
||||
*
|
||||
* Removed unnecessary code duplication for little endian machines
|
||||
* and excessive __inline__s.
|
||||
* Andi Kleen, 1997
|
||||
*
|
||||
* Major simplications and cleanup - we only need to do the metadata, because
|
||||
* we can depend on generic_block_fdatasync() to sync the data blocks.
|
||||
*/
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
/*
|
||||
* If we're not journaling and this is a just-created file, we have to
|
||||
* sync our parent directory (if it was freshly created) since
|
||||
* otherwise it will only be written by writeback, leaving a huge
|
||||
* window during which a crash may lose the file. This may apply for
|
||||
* the parent directory's parent as well, and so on recursively, if
|
||||
* they are also freshly created.
|
||||
*/
|
||||
static int ext4_sync_parent(struct inode *inode)
|
||||
{
|
||||
struct dentry *dentry = NULL;
|
||||
struct inode *next;
|
||||
int ret = 0;
|
||||
|
||||
if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
|
||||
return 0;
|
||||
inode = igrab(inode);
|
||||
while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
|
||||
dentry = d_find_any_alias(inode);
|
||||
if (!dentry)
|
||||
break;
|
||||
next = igrab(dentry->d_parent->d_inode);
|
||||
dput(dentry);
|
||||
if (!next)
|
||||
break;
|
||||
iput(inode);
|
||||
inode = next;
|
||||
ret = sync_mapping_buffers(inode->i_mapping);
|
||||
if (ret)
|
||||
break;
|
||||
ret = sync_inode_metadata(inode, 1);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
iput(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* akpm: A new design for ext4_sync_file().
|
||||
*
|
||||
* This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
|
||||
* There cannot be a transaction open by this task.
|
||||
* Another task could have dirtied this inode. Its data can be in any
|
||||
* state in the journalling system.
|
||||
*
|
||||
* What we do is just kick off a commit and wait on it. This will snapshot the
|
||||
* inode to disk.
|
||||
*/
|
||||
|
||||
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
int ret = 0, err;
|
||||
tid_t commit_tid;
|
||||
bool needs_barrier = false;
|
||||
|
||||
J_ASSERT(ext4_journal_current_handle() == NULL);
|
||||
|
||||
trace_ext4_sync_file_enter(file, datasync);
|
||||
|
||||
if (inode->i_sb->s_flags & MS_RDONLY) {
|
||||
/* Make sure that we read updated s_mount_flags value */
|
||||
smp_rmb();
|
||||
if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!journal) {
|
||||
ret = generic_file_fsync(file, start, end, datasync);
|
||||
if (!ret && !hlist_empty(&inode->i_dentry))
|
||||
ret = ext4_sync_parent(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* data=writeback,ordered:
|
||||
* The caller's filemap_fdatawrite()/wait will sync the data.
|
||||
* Metadata is in the journal, we wait for proper transaction to
|
||||
* commit here.
|
||||
*
|
||||
* data=journal:
|
||||
* filemap_fdatawrite won't do anything (the buffers are clean).
|
||||
* ext4_force_commit will write the file data into the journal and
|
||||
* will wait on that.
|
||||
* filemap_fdatawait() will encounter a ton of newly-dirtied pages
|
||||
* (they were dirtied by commit). But that's OK - the blocks are
|
||||
* safe in-journal, which is all fsync() needs to ensure.
|
||||
*/
|
||||
if (ext4_should_journal_data(inode)) {
|
||||
ret = ext4_force_commit(inode->i_sb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
|
||||
if (journal->j_flags & JBD2_BARRIER &&
|
||||
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
|
||||
needs_barrier = true;
|
||||
ret = jbd2_complete_transaction(journal, commit_tid);
|
||||
if (needs_barrier) {
|
||||
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
}
|
||||
out:
|
||||
trace_ext4_sync_file_exit(inode, ret);
|
||||
return ret;
|
||||
}
|
208
fs/ext4/hash.c
Normal file
208
fs/ext4/hash.c
Normal file
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
* linux/fs/ext4/hash.c
|
||||
*
|
||||
* Copyright (C) 2002 by Theodore Ts'o
|
||||
*
|
||||
* This file is released under the GPL v2.
|
||||
*
|
||||
* This file may be redistributed under the terms of the GNU Public
|
||||
* License.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include "ext4.h"
|
||||
|
||||
#define DELTA 0x9E3779B9
|
||||
|
||||
static void TEA_transform(__u32 buf[4], __u32 const in[])
|
||||
{
|
||||
__u32 sum = 0;
|
||||
__u32 b0 = buf[0], b1 = buf[1];
|
||||
__u32 a = in[0], b = in[1], c = in[2], d = in[3];
|
||||
int n = 16;
|
||||
|
||||
do {
|
||||
sum += DELTA;
|
||||
b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
|
||||
b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
|
||||
} while (--n);
|
||||
|
||||
buf[0] += b0;
|
||||
buf[1] += b1;
|
||||
}
|
||||
|
||||
|
||||
/* The old legacy hash */
|
||||
static __u32 dx_hack_hash_unsigned(const char *name, int len)
|
||||
{
|
||||
__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
|
||||
const unsigned char *ucp = (const unsigned char *) name;
|
||||
|
||||
while (len--) {
|
||||
hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
|
||||
|
||||
if (hash & 0x80000000)
|
||||
hash -= 0x7fffffff;
|
||||
hash1 = hash0;
|
||||
hash0 = hash;
|
||||
}
|
||||
return hash0 << 1;
|
||||
}
|
||||
|
||||
static __u32 dx_hack_hash_signed(const char *name, int len)
|
||||
{
|
||||
__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
|
||||
const signed char *scp = (const signed char *) name;
|
||||
|
||||
while (len--) {
|
||||
hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
|
||||
|
||||
if (hash & 0x80000000)
|
||||
hash -= 0x7fffffff;
|
||||
hash1 = hash0;
|
||||
hash0 = hash;
|
||||
}
|
||||
return hash0 << 1;
|
||||
}
|
||||
|
||||
static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
|
||||
{
|
||||
__u32 pad, val;
|
||||
int i;
|
||||
const signed char *scp = (const signed char *) msg;
|
||||
|
||||
pad = (__u32)len | ((__u32)len << 8);
|
||||
pad |= pad << 16;
|
||||
|
||||
val = pad;
|
||||
if (len > num*4)
|
||||
len = num * 4;
|
||||
for (i = 0; i < len; i++) {
|
||||
if ((i % 4) == 0)
|
||||
val = pad;
|
||||
val = ((int) scp[i]) + (val << 8);
|
||||
if ((i % 4) == 3) {
|
||||
*buf++ = val;
|
||||
val = pad;
|
||||
num--;
|
||||
}
|
||||
}
|
||||
if (--num >= 0)
|
||||
*buf++ = val;
|
||||
while (--num >= 0)
|
||||
*buf++ = pad;
|
||||
}
|
||||
|
||||
static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
|
||||
{
|
||||
__u32 pad, val;
|
||||
int i;
|
||||
const unsigned char *ucp = (const unsigned char *) msg;
|
||||
|
||||
pad = (__u32)len | ((__u32)len << 8);
|
||||
pad |= pad << 16;
|
||||
|
||||
val = pad;
|
||||
if (len > num*4)
|
||||
len = num * 4;
|
||||
for (i = 0; i < len; i++) {
|
||||
if ((i % 4) == 0)
|
||||
val = pad;
|
||||
val = ((int) ucp[i]) + (val << 8);
|
||||
if ((i % 4) == 3) {
|
||||
*buf++ = val;
|
||||
val = pad;
|
||||
num--;
|
||||
}
|
||||
}
|
||||
if (--num >= 0)
|
||||
*buf++ = val;
|
||||
while (--num >= 0)
|
||||
*buf++ = pad;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the hash of a filename. If len is 0 and name is NULL, then
|
||||
* this function can be used to test whether or not a hash version is
|
||||
* supported.
|
||||
*
|
||||
* The seed is an 4 longword (32 bits) "secret" which can be used to
|
||||
* uniquify a hash. If the seed is all zero's, then some default seed
|
||||
* may be used.
|
||||
*
|
||||
* A particular hash version specifies whether or not the seed is
|
||||
* represented, and whether or not the returned hash is 32 bits or 64
|
||||
* bits. 32 bit hashes will return 0 for the minor hash.
|
||||
*/
|
||||
int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
|
||||
{
|
||||
__u32 hash;
|
||||
__u32 minor_hash = 0;
|
||||
const char *p;
|
||||
int i;
|
||||
__u32 in[8], buf[4];
|
||||
void (*str2hashbuf)(const char *, int, __u32 *, int) =
|
||||
str2hashbuf_signed;
|
||||
|
||||
/* Initialize the default seed for the hash checksum functions */
|
||||
buf[0] = 0x67452301;
|
||||
buf[1] = 0xefcdab89;
|
||||
buf[2] = 0x98badcfe;
|
||||
buf[3] = 0x10325476;
|
||||
|
||||
/* Check to see if the seed is all zero's */
|
||||
if (hinfo->seed) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (hinfo->seed[i]) {
|
||||
memcpy(buf, hinfo->seed, sizeof(buf));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (hinfo->hash_version) {
|
||||
case DX_HASH_LEGACY_UNSIGNED:
|
||||
hash = dx_hack_hash_unsigned(name, len);
|
||||
break;
|
||||
case DX_HASH_LEGACY:
|
||||
hash = dx_hack_hash_signed(name, len);
|
||||
break;
|
||||
case DX_HASH_HALF_MD4_UNSIGNED:
|
||||
str2hashbuf = str2hashbuf_unsigned;
|
||||
case DX_HASH_HALF_MD4:
|
||||
p = name;
|
||||
while (len > 0) {
|
||||
(*str2hashbuf)(p, len, in, 8);
|
||||
half_md4_transform(buf, in);
|
||||
len -= 32;
|
||||
p += 32;
|
||||
}
|
||||
minor_hash = buf[2];
|
||||
hash = buf[1];
|
||||
break;
|
||||
case DX_HASH_TEA_UNSIGNED:
|
||||
str2hashbuf = str2hashbuf_unsigned;
|
||||
case DX_HASH_TEA:
|
||||
p = name;
|
||||
while (len > 0) {
|
||||
(*str2hashbuf)(p, len, in, 4);
|
||||
TEA_transform(buf, in);
|
||||
len -= 16;
|
||||
p += 16;
|
||||
}
|
||||
hash = buf[0];
|
||||
minor_hash = buf[1];
|
||||
break;
|
||||
default:
|
||||
hinfo->hash = 0;
|
||||
return -1;
|
||||
}
|
||||
hash = hash & ~1;
|
||||
if (hash == (EXT4_HTREE_EOF_32BIT << 1))
|
||||
hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
|
||||
hinfo->hash = hash;
|
||||
hinfo->minor_hash = minor_hash;
|
||||
return 0;
|
||||
}
|
1331
fs/ext4/ialloc.c
Normal file
1331
fs/ext4/ialloc.c
Normal file
File diff suppressed because it is too large
Load diff
1549
fs/ext4/indirect.c
Normal file
1549
fs/ext4/indirect.c
Normal file
File diff suppressed because it is too large
Load diff
2007
fs/ext4/inline.c
Normal file
2007
fs/ext4/inline.c
Normal file
File diff suppressed because it is too large
Load diff
5096
fs/ext4/inode.c
Normal file
5096
fs/ext4/inode.c
Normal file
File diff suppressed because it is too large
Load diff
700
fs/ext4/ioctl.c
Normal file
700
fs/ext4/ioctl.c
Normal file
|
@ -0,0 +1,700 @@
|
|||
/*
|
||||
* linux/fs/ext4/ioctl.c
|
||||
*
|
||||
* Copyright (C) 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/file.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
|
||||
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
|
||||
|
||||
/**
|
||||
* Swap memory between @a and @b for @len bytes.
|
||||
*
|
||||
* @a: pointer to first memory area
|
||||
* @b: pointer to second memory area
|
||||
* @len: number of bytes to swap
|
||||
*
|
||||
*/
|
||||
static void memswap(void *a, void *b, size_t len)
|
||||
{
|
||||
unsigned char *ap, *bp;
|
||||
unsigned char tmp;
|
||||
|
||||
ap = (unsigned char *)a;
|
||||
bp = (unsigned char *)b;
|
||||
while (len-- > 0) {
|
||||
tmp = *ap;
|
||||
*ap = *bp;
|
||||
*bp = tmp;
|
||||
ap++;
|
||||
bp++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap i_data and associated attributes between @inode1 and @inode2.
|
||||
* This function is used for the primary swap between inode1 and inode2
|
||||
* and also to revert this primary swap in case of errors.
|
||||
*
|
||||
* Therefore you have to make sure, that calling this method twice
|
||||
* will revert all changes.
|
||||
*
|
||||
* @inode1: pointer to first inode
|
||||
* @inode2: pointer to second inode
|
||||
*/
|
||||
static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
loff_t isize;
|
||||
struct ext4_inode_info *ei1;
|
||||
struct ext4_inode_info *ei2;
|
||||
|
||||
ei1 = EXT4_I(inode1);
|
||||
ei2 = EXT4_I(inode2);
|
||||
|
||||
memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags));
|
||||
memswap(&inode1->i_version, &inode2->i_version,
|
||||
sizeof(inode1->i_version));
|
||||
memswap(&inode1->i_blocks, &inode2->i_blocks,
|
||||
sizeof(inode1->i_blocks));
|
||||
memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes));
|
||||
memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime));
|
||||
memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime));
|
||||
|
||||
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
|
||||
memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
|
||||
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
|
||||
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
|
||||
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
|
||||
ext4_es_lru_del(inode1);
|
||||
ext4_es_lru_del(inode2);
|
||||
|
||||
isize = i_size_read(inode1);
|
||||
i_size_write(inode1, i_size_read(inode2));
|
||||
i_size_write(inode2, isize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the information from the given @inode and the inode
|
||||
* EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
|
||||
* important fields of the inodes.
|
||||
*
|
||||
* @sb: the super block of the filesystem
|
||||
* @inode: the inode to swap with EXT4_BOOT_LOADER_INO
|
||||
*
|
||||
*/
|
||||
static long swap_inode_boot_loader(struct super_block *sb,
|
||||
struct inode *inode)
|
||||
{
|
||||
handle_t *handle;
|
||||
int err;
|
||||
struct inode *inode_bl;
|
||||
struct ext4_inode_info *ei_bl;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
|
||||
return -EINVAL;
|
||||
|
||||
if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
|
||||
if (IS_ERR(inode_bl))
|
||||
return PTR_ERR(inode_bl);
|
||||
ei_bl = EXT4_I(inode_bl);
|
||||
|
||||
filemap_flush(inode->i_mapping);
|
||||
filemap_flush(inode_bl->i_mapping);
|
||||
|
||||
/* Protect orig inodes against a truncate and make sure,
|
||||
* that only 1 swap_inode_boot_loader is running. */
|
||||
lock_two_nondirectories(inode, inode_bl);
|
||||
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
truncate_inode_pages(&inode_bl->i_data, 0);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
ext4_inode_block_unlocked_dio(inode_bl);
|
||||
inode_dio_wait(inode);
|
||||
inode_dio_wait(inode_bl);
|
||||
|
||||
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
err = -EINVAL;
|
||||
goto journal_err_out;
|
||||
}
|
||||
|
||||
/* Protect extent tree against block allocations via delalloc */
|
||||
ext4_double_down_write_data_sem(inode, inode_bl);
|
||||
|
||||
if (inode_bl->i_nlink == 0) {
|
||||
/* this inode has never been used as a BOOT_LOADER */
|
||||
set_nlink(inode_bl, 1);
|
||||
i_uid_write(inode_bl, 0);
|
||||
i_gid_write(inode_bl, 0);
|
||||
inode_bl->i_flags = 0;
|
||||
ei_bl->i_flags = 0;
|
||||
inode_bl->i_version = 1;
|
||||
i_size_write(inode_bl, 0);
|
||||
inode_bl->i_mode = S_IFREG;
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS)) {
|
||||
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
|
||||
ext4_ext_tree_init(handle, inode_bl);
|
||||
} else
|
||||
memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
|
||||
}
|
||||
|
||||
swap_inode_data(inode, inode_bl);
|
||||
|
||||
inode->i_ctime = inode_bl->i_ctime = ext4_current_time(inode);
|
||||
|
||||
spin_lock(&sbi->s_next_gen_lock);
|
||||
inode->i_generation = sbi->s_next_generation++;
|
||||
inode_bl->i_generation = sbi->s_next_generation++;
|
||||
spin_unlock(&sbi->s_next_gen_lock);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
err = ext4_mark_inode_dirty(handle, inode);
|
||||
if (err < 0) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"couldn't mark inode #%lu dirty (err %d)",
|
||||
inode->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
} else {
|
||||
err = ext4_mark_inode_dirty(handle, inode_bl);
|
||||
if (err < 0) {
|
||||
ext4_warning(inode_bl->i_sb,
|
||||
"couldn't mark inode #%lu dirty (err %d)",
|
||||
inode_bl->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
ext4_double_up_write_data_sem(inode, inode_bl);
|
||||
|
||||
journal_err_out:
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
ext4_inode_resume_unlocked_dio(inode_bl);
|
||||
unlock_two_nondirectories(inode, inode_bl);
|
||||
iput(inode_bl);
|
||||
return err;
|
||||
}
|
||||
|
||||
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(filp);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
unsigned int flags;
|
||||
|
||||
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
|
||||
|
||||
switch (cmd) {
|
||||
case EXT4_IOC_GETFLAGS:
|
||||
ext4_get_inode_flags(ei);
|
||||
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
||||
return put_user(flags, (int __user *) arg);
|
||||
case EXT4_IOC_SETFLAGS: {
|
||||
handle_t *handle = NULL;
|
||||
int err, migrate = 0;
|
||||
struct ext4_iloc iloc;
|
||||
unsigned int oldflags, mask, i;
|
||||
unsigned int jflag;
|
||||
|
||||
if (!inode_owner_or_capable(inode))
|
||||
return -EACCES;
|
||||
|
||||
if (get_user(flags, (int __user *) arg))
|
||||
return -EFAULT;
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
flags = ext4_mask_flags(inode->i_mode, flags);
|
||||
|
||||
err = -EPERM;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
/* Is it quota file? Do not allow user to mess with it */
|
||||
if (IS_NOQUOTA(inode))
|
||||
goto flags_out;
|
||||
|
||||
oldflags = ei->i_flags;
|
||||
|
||||
/* The JOURNAL_DATA flag is modifiable only by root */
|
||||
jflag = flags & EXT4_JOURNAL_DATA_FL;
|
||||
|
||||
/*
|
||||
* The IMMUTABLE and APPEND_ONLY flags can only be changed by
|
||||
* the relevant capability.
|
||||
*
|
||||
* This test looks nicer. Thanks to Pauline Middelink
|
||||
*/
|
||||
if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
|
||||
if (!capable(CAP_LINUX_IMMUTABLE))
|
||||
goto flags_out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The JOURNAL_DATA flag can only be changed by
|
||||
* the relevant capability.
|
||||
*/
|
||||
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
|
||||
if (!capable(CAP_SYS_RESOURCE))
|
||||
goto flags_out;
|
||||
}
|
||||
if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
|
||||
migrate = 1;
|
||||
|
||||
if (flags & EXT4_EOFBLOCKS_FL) {
|
||||
/* we don't support adding EOFBLOCKS flag */
|
||||
if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto flags_out;
|
||||
}
|
||||
} else if (oldflags & EXT4_EOFBLOCKS_FL)
|
||||
ext4_truncate(inode);
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto flags_out;
|
||||
}
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
||||
if (err)
|
||||
goto flags_err;
|
||||
|
||||
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
|
||||
if (!(mask & EXT4_FL_USER_MODIFIABLE))
|
||||
continue;
|
||||
if (mask & flags)
|
||||
ext4_set_inode_flag(inode, i);
|
||||
else
|
||||
ext4_clear_inode_flag(inode, i);
|
||||
}
|
||||
|
||||
ext4_set_inode_flags(inode);
|
||||
inode->i_ctime = ext4_current_time(inode);
|
||||
|
||||
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
||||
flags_err:
|
||||
ext4_journal_stop(handle);
|
||||
if (err)
|
||||
goto flags_out;
|
||||
|
||||
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
|
||||
err = ext4_change_inode_journal_flag(inode, jflag);
|
||||
if (err)
|
||||
goto flags_out;
|
||||
if (migrate) {
|
||||
if (flags & EXT4_EXTENTS_FL)
|
||||
err = ext4_ext_migrate(inode);
|
||||
else
|
||||
err = ext4_ind_migrate(inode);
|
||||
}
|
||||
|
||||
flags_out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
mnt_drop_write_file(filp);
|
||||
return err;
|
||||
}
|
||||
case EXT4_IOC_GETVERSION:
|
||||
case EXT4_IOC_GETVERSION_OLD:
|
||||
return put_user(inode->i_generation, (int __user *) arg);
|
||||
case EXT4_IOC_SETVERSION:
|
||||
case EXT4_IOC_SETVERSION_OLD: {
|
||||
handle_t *handle;
|
||||
struct ext4_iloc iloc;
|
||||
__u32 generation;
|
||||
int err;
|
||||
|
||||
if (!inode_owner_or_capable(inode))
|
||||
return -EPERM;
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb)) {
|
||||
ext4_warning(sb, "Setting inode version is not "
|
||||
"supported with metadata_csum enabled.");
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
if (get_user(generation, (int __user *) arg)) {
|
||||
err = -EFAULT;
|
||||
goto setversion_out;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto unlock_out;
|
||||
}
|
||||
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
||||
if (err == 0) {
|
||||
inode->i_ctime = ext4_current_time(inode);
|
||||
inode->i_generation = generation;
|
||||
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
unlock_out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
setversion_out:
|
||||
mnt_drop_write_file(filp);
|
||||
return err;
|
||||
}
|
||||
case EXT4_IOC_GROUP_EXTEND: {
|
||||
ext4_fsblk_t n_blocks_count;
|
||||
int err, err2=0;
|
||||
|
||||
err = ext4_resize_begin(sb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (get_user(n_blocks_count, (__u32 __user *)arg)) {
|
||||
err = -EFAULT;
|
||||
goto group_extend_out;
|
||||
}
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Online resizing not supported with bigalloc");
|
||||
err = -EOPNOTSUPP;
|
||||
goto group_extend_out;
|
||||
}
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
goto group_extend_out;
|
||||
|
||||
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
|
||||
if (EXT4_SB(sb)->s_journal) {
|
||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||
}
|
||||
if (err == 0)
|
||||
err = err2;
|
||||
mnt_drop_write_file(filp);
|
||||
group_extend_out:
|
||||
ext4_resize_end(sb);
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_MOVE_EXT: {
|
||||
struct move_extent me;
|
||||
struct fd donor;
|
||||
int err;
|
||||
|
||||
if (!(filp->f_mode & FMODE_READ) ||
|
||||
!(filp->f_mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
|
||||
if (copy_from_user(&me,
|
||||
(struct move_extent __user *)arg, sizeof(me)))
|
||||
return -EFAULT;
|
||||
me.moved_len = 0;
|
||||
|
||||
donor = fdget(me.donor_fd);
|
||||
if (!donor.file)
|
||||
return -EBADF;
|
||||
|
||||
if (!(donor.file->f_mode & FMODE_WRITE)) {
|
||||
err = -EBADF;
|
||||
goto mext_out;
|
||||
}
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Online defrag not supported with bigalloc");
|
||||
err = -EOPNOTSUPP;
|
||||
goto mext_out;
|
||||
}
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
goto mext_out;
|
||||
|
||||
err = ext4_move_extents(filp, donor.file, me.orig_start,
|
||||
me.donor_start, me.len, &me.moved_len);
|
||||
mnt_drop_write_file(filp);
|
||||
|
||||
if (copy_to_user((struct move_extent __user *)arg,
|
||||
&me, sizeof(me)))
|
||||
err = -EFAULT;
|
||||
mext_out:
|
||||
fdput(donor);
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_GROUP_ADD: {
|
||||
struct ext4_new_group_data input;
|
||||
int err, err2=0;
|
||||
|
||||
err = ext4_resize_begin(sb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
|
||||
sizeof(input))) {
|
||||
err = -EFAULT;
|
||||
goto group_add_out;
|
||||
}
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Online resizing not supported with bigalloc");
|
||||
err = -EOPNOTSUPP;
|
||||
goto group_add_out;
|
||||
}
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
goto group_add_out;
|
||||
|
||||
err = ext4_group_add(sb, &input);
|
||||
if (EXT4_SB(sb)->s_journal) {
|
||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||
}
|
||||
if (err == 0)
|
||||
err = err2;
|
||||
mnt_drop_write_file(filp);
|
||||
if (!err && ext4_has_group_desc_csum(sb) &&
|
||||
test_opt(sb, INIT_INODE_TABLE))
|
||||
err = ext4_register_li_request(sb, input.group);
|
||||
group_add_out:
|
||||
ext4_resize_end(sb);
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_MIGRATE:
|
||||
{
|
||||
int err;
|
||||
if (!inode_owner_or_capable(inode))
|
||||
return -EACCES;
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
/*
|
||||
* inode_mutex prevent write and truncate on the file.
|
||||
* Read still goes through. We take i_data_sem in
|
||||
* ext4_ext_swap_inode_data before we switch the
|
||||
* inode format to prevent read.
|
||||
*/
|
||||
mutex_lock(&(inode->i_mutex));
|
||||
err = ext4_ext_migrate(inode);
|
||||
mutex_unlock(&(inode->i_mutex));
|
||||
mnt_drop_write_file(filp);
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_ALLOC_DA_BLKS:
|
||||
{
|
||||
int err;
|
||||
if (!inode_owner_or_capable(inode))
|
||||
return -EACCES;
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
err = ext4_alloc_da_blocks(inode);
|
||||
mnt_drop_write_file(filp);
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_SWAP_BOOT:
|
||||
{
|
||||
int err;
|
||||
if (!(filp->f_mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
err = swap_inode_boot_loader(sb, inode);
|
||||
mnt_drop_write_file(filp);
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_RESIZE_FS: {
|
||||
ext4_fsblk_t n_blocks_count;
|
||||
int err = 0, err2 = 0;
|
||||
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Online resizing not (yet) supported with bigalloc");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
|
||||
sizeof(__u64))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
err = ext4_resize_begin(sb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
goto resizefs_out;
|
||||
|
||||
err = ext4_resize_fs(sb, n_blocks_count);
|
||||
if (EXT4_SB(sb)->s_journal) {
|
||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||
}
|
||||
if (err == 0)
|
||||
err = err2;
|
||||
mnt_drop_write_file(filp);
|
||||
if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
|
||||
ext4_has_group_desc_csum(sb) &&
|
||||
test_opt(sb, INIT_INODE_TABLE))
|
||||
err = ext4_register_li_request(sb, o_group);
|
||||
|
||||
resizefs_out:
|
||||
ext4_resize_end(sb);
|
||||
return err;
|
||||
}
|
||||
|
||||
case FIDTRIM:
|
||||
case FITRIM:
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(sb->s_bdev);
|
||||
struct fstrim_range range;
|
||||
int ret = 0;
|
||||
int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!blk_queue_discard(q))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
|
||||
return -EOPNOTSUPP;
|
||||
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
|
||||
sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
range.minlen = max((unsigned int)range.minlen,
|
||||
q->limits.discard_granularity);
|
||||
ret = ext4_trim_fs(sb, &range, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (copy_to_user((struct fstrim_range __user *)arg, &range,
|
||||
sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
case EXT4_IOC_PRECACHE_EXTENTS:
|
||||
return ext4_ext_precache(inode);
|
||||
|
||||
case FS_IOC_INVAL_MAPPING:
|
||||
{
|
||||
return invalidate_mapping_pages(inode->i_mapping, 0, -1);
|
||||
}
|
||||
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
/* These are just misnamed, they actually get/put from/to user an int */
|
||||
switch (cmd) {
|
||||
case EXT4_IOC32_GETFLAGS:
|
||||
cmd = EXT4_IOC_GETFLAGS;
|
||||
break;
|
||||
case EXT4_IOC32_SETFLAGS:
|
||||
cmd = EXT4_IOC_SETFLAGS;
|
||||
break;
|
||||
case EXT4_IOC32_GETVERSION:
|
||||
cmd = EXT4_IOC_GETVERSION;
|
||||
break;
|
||||
case EXT4_IOC32_SETVERSION:
|
||||
cmd = EXT4_IOC_SETVERSION;
|
||||
break;
|
||||
case EXT4_IOC32_GROUP_EXTEND:
|
||||
cmd = EXT4_IOC_GROUP_EXTEND;
|
||||
break;
|
||||
case EXT4_IOC32_GETVERSION_OLD:
|
||||
cmd = EXT4_IOC_GETVERSION_OLD;
|
||||
break;
|
||||
case EXT4_IOC32_SETVERSION_OLD:
|
||||
cmd = EXT4_IOC_SETVERSION_OLD;
|
||||
break;
|
||||
case EXT4_IOC32_GETRSVSZ:
|
||||
cmd = EXT4_IOC_GETRSVSZ;
|
||||
break;
|
||||
case EXT4_IOC32_SETRSVSZ:
|
||||
cmd = EXT4_IOC_SETRSVSZ;
|
||||
break;
|
||||
case EXT4_IOC32_GROUP_ADD: {
|
||||
struct compat_ext4_new_group_input __user *uinput;
|
||||
struct ext4_new_group_input input;
|
||||
mm_segment_t old_fs;
|
||||
int err;
|
||||
|
||||
uinput = compat_ptr(arg);
|
||||
err = get_user(input.group, &uinput->group);
|
||||
err |= get_user(input.block_bitmap, &uinput->block_bitmap);
|
||||
err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
|
||||
err |= get_user(input.inode_table, &uinput->inode_table);
|
||||
err |= get_user(input.blocks_count, &uinput->blocks_count);
|
||||
err |= get_user(input.reserved_blocks,
|
||||
&uinput->reserved_blocks);
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
old_fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
|
||||
(unsigned long) &input);
|
||||
set_fs(old_fs);
|
||||
return err;
|
||||
}
|
||||
case EXT4_IOC_MOVE_EXT:
|
||||
case FITRIM:
|
||||
case EXT4_IOC_RESIZE_FS:
|
||||
case EXT4_IOC_PRECACHE_EXTENTS:
|
||||
break;
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
|
||||
}
|
||||
#endif
|
5266
fs/ext4/mballoc.c
Normal file
5266
fs/ext4/mballoc.c
Normal file
File diff suppressed because it is too large
Load diff
215
fs/ext4/mballoc.h
Normal file
215
fs/ext4/mballoc.h
Normal file
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
* fs/ext4/mballoc.h
|
||||
*
|
||||
* Written by: Alex Tomas <alex@clusterfs.com>
|
||||
*
|
||||
*/
|
||||
#ifndef _EXT4_MBALLOC_H
|
||||
#define _EXT4_MBALLOC_H
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/mutex.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
|
||||
/*
|
||||
* with AGGRESSIVE_CHECK allocator runs consistency checks over
|
||||
* structures. these checks slow things down a lot
|
||||
*/
|
||||
#define AGGRESSIVE_CHECK__
|
||||
|
||||
/*
|
||||
* with DOUBLE_CHECK defined mballoc creates persistent in-core
|
||||
* bitmaps, maintains and uses them to check for double allocations
|
||||
*/
|
||||
#define DOUBLE_CHECK__
|
||||
|
||||
/*
|
||||
*/
|
||||
#ifdef CONFIG_EXT4_DEBUG
|
||||
extern ushort ext4_mballoc_debug;
|
||||
|
||||
#define mb_debug(n, fmt, a...) \
|
||||
do { \
|
||||
if ((n) <= ext4_mballoc_debug) { \
|
||||
printk(KERN_DEBUG "(%s, %d): %s: ", \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
printk(fmt, ## a); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define mb_debug(n, fmt, a...) no_printk(fmt, ## a)
|
||||
#endif
|
||||
|
||||
#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
|
||||
#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
|
||||
|
||||
/*
|
||||
* How long mballoc can look for a best extent (in found extents)
|
||||
*/
|
||||
#define MB_DEFAULT_MAX_TO_SCAN 200
|
||||
|
||||
/*
|
||||
* How long mballoc must look for a best extent
|
||||
*/
|
||||
#define MB_DEFAULT_MIN_TO_SCAN 10
|
||||
|
||||
/*
|
||||
* with 'ext4_mb_stats' allocator will collect stats that will be
|
||||
* shown at umount. The collecting costs though!
|
||||
*/
|
||||
#define MB_DEFAULT_STATS 0
|
||||
|
||||
/*
|
||||
* files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
|
||||
* by the stream allocator, which purpose is to pack requests
|
||||
* as close each to other as possible to produce smooth I/O traffic
|
||||
* We use locality group prealloc space for stream request.
|
||||
* We can tune the same via /proc/fs/ext4/<parition>/stream_req
|
||||
*/
|
||||
#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
|
||||
|
||||
/*
|
||||
* for which requests use 2^N search using buddies
|
||||
*/
|
||||
#define MB_DEFAULT_ORDER2_REQS 2
|
||||
|
||||
/*
|
||||
* default group prealloc size 512 blocks
|
||||
*/
|
||||
#define MB_DEFAULT_GROUP_PREALLOC 512
|
||||
|
||||
|
||||
struct ext4_free_data {
|
||||
/* MUST be the first member */
|
||||
struct ext4_journal_cb_entry efd_jce;
|
||||
|
||||
/* ext4_free_data private data starts from here */
|
||||
|
||||
/* this links the free block information from group_info */
|
||||
struct rb_node efd_node;
|
||||
|
||||
/* group which free block extent belongs */
|
||||
ext4_group_t efd_group;
|
||||
|
||||
/* free block extent */
|
||||
ext4_grpblk_t efd_start_cluster;
|
||||
ext4_grpblk_t efd_count;
|
||||
|
||||
/* transaction which freed this extent */
|
||||
tid_t efd_tid;
|
||||
};
|
||||
|
||||
struct ext4_prealloc_space {
|
||||
struct list_head pa_inode_list;
|
||||
struct list_head pa_group_list;
|
||||
union {
|
||||
struct list_head pa_tmp_list;
|
||||
struct rcu_head pa_rcu;
|
||||
} u;
|
||||
spinlock_t pa_lock;
|
||||
atomic_t pa_count;
|
||||
unsigned pa_deleted;
|
||||
ext4_fsblk_t pa_pstart; /* phys. block */
|
||||
ext4_lblk_t pa_lstart; /* log. block */
|
||||
ext4_grpblk_t pa_len; /* len of preallocated chunk */
|
||||
ext4_grpblk_t pa_free; /* how many blocks are free */
|
||||
unsigned short pa_type; /* pa type. inode or group */
|
||||
spinlock_t *pa_obj_lock;
|
||||
struct inode *pa_inode; /* hack, for history only */
|
||||
};
|
||||
|
||||
enum {
|
||||
MB_INODE_PA = 0,
|
||||
MB_GROUP_PA = 1
|
||||
};
|
||||
|
||||
struct ext4_free_extent {
|
||||
ext4_lblk_t fe_logical;
|
||||
ext4_grpblk_t fe_start; /* In cluster units */
|
||||
ext4_group_t fe_group;
|
||||
ext4_grpblk_t fe_len; /* In cluster units */
|
||||
};
|
||||
|
||||
/*
|
||||
* Locality group:
|
||||
* we try to group all related changes together
|
||||
* so that writeback can flush/allocate them together as well
|
||||
* Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
|
||||
* (512). We store prealloc space into the hash based on the pa_free blocks
|
||||
* order value.ie, fls(pa_free)-1;
|
||||
*/
|
||||
#define PREALLOC_TB_SIZE 10
|
||||
struct ext4_locality_group {
|
||||
/* for allocator */
|
||||
/* to serialize allocates */
|
||||
struct mutex lg_mutex;
|
||||
/* list of preallocations */
|
||||
struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
|
||||
spinlock_t lg_prealloc_lock;
|
||||
};
|
||||
|
||||
struct ext4_allocation_context {
|
||||
struct inode *ac_inode;
|
||||
struct super_block *ac_sb;
|
||||
|
||||
/* original request */
|
||||
struct ext4_free_extent ac_o_ex;
|
||||
|
||||
/* goal request (normalized ac_o_ex) */
|
||||
struct ext4_free_extent ac_g_ex;
|
||||
|
||||
/* the best found extent */
|
||||
struct ext4_free_extent ac_b_ex;
|
||||
|
||||
/* copy of the best found extent taken before preallocation efforts */
|
||||
struct ext4_free_extent ac_f_ex;
|
||||
|
||||
__u16 ac_groups_scanned;
|
||||
__u16 ac_found;
|
||||
__u16 ac_tail;
|
||||
__u16 ac_buddy;
|
||||
__u16 ac_flags; /* allocation hints */
|
||||
__u8 ac_status;
|
||||
__u8 ac_criteria;
|
||||
__u8 ac_2order; /* if request is to allocate 2^N blocks and
|
||||
* N > 0, the field stores N, otherwise 0 */
|
||||
__u8 ac_op; /* operation, for history only */
|
||||
struct page *ac_bitmap_page;
|
||||
struct page *ac_buddy_page;
|
||||
struct ext4_prealloc_space *ac_pa;
|
||||
struct ext4_locality_group *ac_lg;
|
||||
};
|
||||
|
||||
#define AC_STATUS_CONTINUE 1
|
||||
#define AC_STATUS_FOUND 2
|
||||
#define AC_STATUS_BREAK 3
|
||||
|
||||
struct ext4_buddy {
|
||||
struct page *bd_buddy_page;
|
||||
void *bd_buddy;
|
||||
struct page *bd_bitmap_page;
|
||||
void *bd_bitmap;
|
||||
struct ext4_group_info *bd_info;
|
||||
struct super_block *bd_sb;
|
||||
__u16 bd_blkbits;
|
||||
ext4_group_t bd_group;
|
||||
};
|
||||
|
||||
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
|
||||
struct ext4_free_extent *fex)
|
||||
{
|
||||
return ext4_group_first_block_no(sb, fex->fe_group) +
|
||||
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
|
||||
}
|
||||
#endif
|
672
fs/ext4/migrate.c
Normal file
672
fs/ext4/migrate.c
Normal file
|
@ -0,0 +1,672 @@
|
|||
/*
|
||||
* Copyright IBM Corporation, 2007
|
||||
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4_extents.h"
|
||||
|
||||
/*
|
||||
* The contiguous blocks details which can be
|
||||
* represented by a single extent
|
||||
*/
|
||||
struct migrate_struct {
|
||||
ext4_lblk_t first_block, last_block, curr_block;
|
||||
ext4_fsblk_t first_pblock, last_pblock;
|
||||
};
|
||||
|
||||
static int finish_range(handle_t *handle, struct inode *inode,
|
||||
struct migrate_struct *lb)
|
||||
|
||||
{
|
||||
int retval = 0, needed;
|
||||
struct ext4_extent newext;
|
||||
struct ext4_ext_path *path;
|
||||
if (lb->first_pblock == 0)
|
||||
return 0;
|
||||
|
||||
/* Add the extent to temp inode*/
|
||||
newext.ee_block = cpu_to_le32(lb->first_block);
|
||||
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
|
||||
ext4_ext_store_pblock(&newext, lb->first_pblock);
|
||||
/* Locking only for convinience since we are operating on temp inode */
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
path = ext4_find_extent(inode, lb->first_block, NULL, 0);
|
||||
if (IS_ERR(path)) {
|
||||
retval = PTR_ERR(path);
|
||||
path = NULL;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the credit needed to inserting this extent
|
||||
* Since we are doing this in loop we may accumalate extra
|
||||
* credit. But below we try to not accumalate too much
|
||||
* of them by restarting the journal.
|
||||
*/
|
||||
needed = ext4_ext_calc_credits_for_single_extent(inode,
|
||||
lb->last_block - lb->first_block + 1, path);
|
||||
|
||||
/*
|
||||
* Make sure the credit we accumalated is not really high
|
||||
*/
|
||||
if (needed && ext4_handle_has_enough_credits(handle,
|
||||
EXT4_RESERVE_TRANS_BLOCKS)) {
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
retval = ext4_journal_restart(handle, needed);
|
||||
down_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval)
|
||||
goto err_out;
|
||||
} else if (needed) {
|
||||
retval = ext4_journal_extend(handle, needed);
|
||||
if (retval) {
|
||||
/*
|
||||
* IF not able to extend the journal restart the journal
|
||||
*/
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
retval = ext4_journal_restart(handle, needed);
|
||||
down_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval)
|
||||
goto err_out;
|
||||
}
|
||||
}
|
||||
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
|
||||
err_out:
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
ext4_ext_drop_refs(path);
|
||||
kfree(path);
|
||||
lb->first_pblock = 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int update_extent_range(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t pblock, struct migrate_struct *lb)
|
||||
{
|
||||
int retval;
|
||||
/*
|
||||
* See if we can add on to the existing range (if it exists)
|
||||
*/
|
||||
if (lb->first_pblock &&
|
||||
(lb->last_pblock+1 == pblock) &&
|
||||
(lb->last_block+1 == lb->curr_block)) {
|
||||
lb->last_pblock = pblock;
|
||||
lb->last_block = lb->curr_block;
|
||||
lb->curr_block++;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Start a new range.
|
||||
*/
|
||||
retval = finish_range(handle, inode, lb);
|
||||
lb->first_pblock = lb->last_pblock = pblock;
|
||||
lb->first_block = lb->last_block = lb->curr_block;
|
||||
lb->curr_block++;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int update_ind_extent_range(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t pblock,
|
||||
struct migrate_struct *lb)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
__le32 *i_data;
|
||||
int i, retval = 0;
|
||||
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
|
||||
bh = sb_bread(inode->i_sb, pblock);
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
i_data = (__le32 *)bh->b_data;
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
if (i_data[i]) {
|
||||
retval = update_extent_range(handle, inode,
|
||||
le32_to_cpu(i_data[i]), lb);
|
||||
if (retval)
|
||||
break;
|
||||
} else {
|
||||
lb->curr_block++;
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
return retval;
|
||||
|
||||
}
|
||||
|
||||
static int update_dind_extent_range(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t pblock,
|
||||
struct migrate_struct *lb)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
__le32 *i_data;
|
||||
int i, retval = 0;
|
||||
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
|
||||
bh = sb_bread(inode->i_sb, pblock);
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
i_data = (__le32 *)bh->b_data;
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
if (i_data[i]) {
|
||||
retval = update_ind_extent_range(handle, inode,
|
||||
le32_to_cpu(i_data[i]), lb);
|
||||
if (retval)
|
||||
break;
|
||||
} else {
|
||||
/* Only update the file block number */
|
||||
lb->curr_block += max_entries;
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
return retval;
|
||||
|
||||
}
|
||||
|
||||
static int update_tind_extent_range(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t pblock,
|
||||
struct migrate_struct *lb)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
__le32 *i_data;
|
||||
int i, retval = 0;
|
||||
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
|
||||
bh = sb_bread(inode->i_sb, pblock);
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
i_data = (__le32 *)bh->b_data;
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
if (i_data[i]) {
|
||||
retval = update_dind_extent_range(handle, inode,
|
||||
le32_to_cpu(i_data[i]), lb);
|
||||
if (retval)
|
||||
break;
|
||||
} else {
|
||||
/* Only update the file block number */
|
||||
lb->curr_block += max_entries * max_entries;
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
return retval;
|
||||
|
||||
}
|
||||
|
||||
static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
int retval = 0, needed;
|
||||
|
||||
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
|
||||
return 0;
|
||||
/*
|
||||
* We are freeing a blocks. During this we touch
|
||||
* superblock, group descriptor and block bitmap.
|
||||
* So allocate a credit of 3. We may update
|
||||
* quota (user and group).
|
||||
*/
|
||||
needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
|
||||
|
||||
if (ext4_journal_extend(handle, needed) != 0)
|
||||
retval = ext4_journal_restart(handle, needed);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int free_dind_blocks(handle_t *handle,
|
||||
struct inode *inode, __le32 i_data)
|
||||
{
|
||||
int i;
|
||||
__le32 *tmp_idata;
|
||||
struct buffer_head *bh;
|
||||
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
|
||||
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
tmp_idata = (__le32 *)bh->b_data;
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
if (tmp_idata[i]) {
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
le32_to_cpu(tmp_idata[i]), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int free_tind_blocks(handle_t *handle,
|
||||
struct inode *inode, __le32 i_data)
|
||||
{
|
||||
int i, retval = 0;
|
||||
__le32 *tmp_idata;
|
||||
struct buffer_head *bh;
|
||||
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
|
||||
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
tmp_idata = (__le32 *)bh->b_data;
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
if (tmp_idata[i]) {
|
||||
retval = free_dind_blocks(handle,
|
||||
inode, tmp_idata[i]);
|
||||
if (retval) {
|
||||
put_bh(bh);
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
|
||||
{
|
||||
int retval;
|
||||
|
||||
/* ei->i_data[EXT4_IND_BLOCK] */
|
||||
if (i_data[0]) {
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
le32_to_cpu(i_data[0]), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
}
|
||||
|
||||
/* ei->i_data[EXT4_DIND_BLOCK] */
|
||||
if (i_data[1]) {
|
||||
retval = free_dind_blocks(handle, inode, i_data[1]);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* ei->i_data[EXT4_TIND_BLOCK] */
|
||||
if (i_data[2]) {
|
||||
retval = free_tind_blocks(handle, inode, i_data[2]);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
|
||||
struct inode *tmp_inode)
|
||||
{
|
||||
int retval;
|
||||
__le32 i_data[3];
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
|
||||
|
||||
/*
|
||||
* One credit accounted for writing the
|
||||
* i_data field of the original inode
|
||||
*/
|
||||
retval = ext4_journal_extend(handle, 1);
|
||||
if (retval) {
|
||||
retval = ext4_journal_restart(handle, 1);
|
||||
if (retval)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
i_data[0] = ei->i_data[EXT4_IND_BLOCK];
|
||||
i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
|
||||
i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
/*
|
||||
* if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
|
||||
* happened after we started the migrate. We need to
|
||||
* fail the migrate
|
||||
*/
|
||||
if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
|
||||
retval = -EAGAIN;
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
goto err_out;
|
||||
} else
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
|
||||
/*
|
||||
* We have the extent map build with the tmp inode.
|
||||
* Now copy the i_data across
|
||||
*/
|
||||
ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
|
||||
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
|
||||
|
||||
/*
|
||||
* Update i_blocks with the new blocks that got
|
||||
* allocated while adding extents for extent index
|
||||
* blocks.
|
||||
*
|
||||
* While converting to extents we need not
|
||||
* update the orignal inode i_blocks for extent blocks
|
||||
* via quota APIs. The quota update happened via tmp_inode already.
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_blocks += tmp_inode->i_blocks;
|
||||
spin_unlock(&inode->i_lock);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
/*
|
||||
* We mark the inode dirty after, because we decrement the
|
||||
* i_blocks when freeing the indirect meta-data blocks
|
||||
*/
|
||||
retval = free_ind_block(handle, inode, i_data);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
err_out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int free_ext_idx(handle_t *handle, struct inode *inode,
|
||||
struct ext4_extent_idx *ix)
|
||||
{
|
||||
int i, retval = 0;
|
||||
ext4_fsblk_t block;
|
||||
struct buffer_head *bh;
|
||||
struct ext4_extent_header *eh;
|
||||
|
||||
block = ext4_idx_pblock(ix);
|
||||
bh = sb_bread(inode->i_sb, block);
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
eh = (struct ext4_extent_header *)bh->b_data;
|
||||
if (eh->eh_depth != 0) {
|
||||
ix = EXT_FIRST_INDEX(eh);
|
||||
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
|
||||
retval = free_ext_idx(handle, inode, ix);
|
||||
if (retval)
|
||||
break;
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
ext4_free_blocks(handle, inode, NULL, block, 1,
|
||||
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the extent meta data blocks only
|
||||
*/
|
||||
static int free_ext_block(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
int i, retval = 0;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
|
||||
struct ext4_extent_idx *ix;
|
||||
if (eh->eh_depth == 0)
|
||||
/*
|
||||
* No extra blocks allocated for extent meta data
|
||||
*/
|
||||
return 0;
|
||||
ix = EXT_FIRST_INDEX(eh);
|
||||
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
|
||||
retval = free_ext_idx(handle, inode, ix);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
int ext4_ext_migrate(struct inode *inode)
|
||||
{
|
||||
handle_t *handle;
|
||||
int retval = 0, i;
|
||||
__le32 *i_data;
|
||||
struct ext4_inode_info *ei;
|
||||
struct inode *tmp_inode = NULL;
|
||||
struct migrate_struct lb;
|
||||
unsigned long max_entries;
|
||||
__u32 goal;
|
||||
uid_t owner[2];
|
||||
|
||||
/*
|
||||
* If the filesystem does not support extents, or the inode
|
||||
* already is extent-based, error out.
|
||||
*/
|
||||
if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS) ||
|
||||
(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
return -EINVAL;
|
||||
|
||||
if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
|
||||
/*
|
||||
* don't migrate fast symlink
|
||||
*/
|
||||
return retval;
|
||||
|
||||
/*
|
||||
* Worst case we can touch the allocation bitmaps, a bgd
|
||||
* block, and a block to link in the orphan list. We do need
|
||||
* need to worry about credits for modifying the quota inode.
|
||||
*/
|
||||
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
|
||||
4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
|
||||
|
||||
if (IS_ERR(handle)) {
|
||||
retval = PTR_ERR(handle);
|
||||
return retval;
|
||||
}
|
||||
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
|
||||
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
|
||||
owner[0] = i_uid_read(inode);
|
||||
owner[1] = i_gid_read(inode);
|
||||
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
|
||||
S_IFREG, NULL, goal, owner);
|
||||
if (IS_ERR(tmp_inode)) {
|
||||
retval = PTR_ERR(tmp_inode);
|
||||
ext4_journal_stop(handle);
|
||||
return retval;
|
||||
}
|
||||
i_size_write(tmp_inode, i_size_read(inode));
|
||||
/*
|
||||
* Set the i_nlink to zero so it will be deleted later
|
||||
* when we drop inode reference.
|
||||
*/
|
||||
clear_nlink(tmp_inode);
|
||||
|
||||
ext4_ext_tree_init(handle, tmp_inode);
|
||||
ext4_orphan_add(handle, tmp_inode);
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
/*
|
||||
* start with one credit accounted for
|
||||
* superblock modification.
|
||||
*
|
||||
* For the tmp_inode we already have committed the
|
||||
* transaction that created the inode. Later as and
|
||||
* when we add extents we extent the journal
|
||||
*/
|
||||
/*
|
||||
* Even though we take i_mutex we can still cause block
|
||||
* allocation via mmap write to holes. If we have allocated
|
||||
* new blocks we fail migrate. New block allocation will
|
||||
* clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated
|
||||
* with i_data_sem held to prevent racing with block
|
||||
* allocation.
|
||||
*/
|
||||
down_read(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
|
||||
up_read((&EXT4_I(inode)->i_data_sem));
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
/*
|
||||
* It is impossible to update on-disk structures without
|
||||
* a handle, so just rollback in-core changes and live other
|
||||
* work to orphan_list_cleanup()
|
||||
*/
|
||||
ext4_orphan_del(NULL, tmp_inode);
|
||||
retval = PTR_ERR(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ei = EXT4_I(inode);
|
||||
i_data = ei->i_data;
|
||||
memset(&lb, 0, sizeof(lb));
|
||||
|
||||
/* 32 bit block address 4 bytes */
|
||||
max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
|
||||
if (i_data[i]) {
|
||||
retval = update_extent_range(handle, tmp_inode,
|
||||
le32_to_cpu(i_data[i]), &lb);
|
||||
if (retval)
|
||||
goto err_out;
|
||||
} else
|
||||
lb.curr_block++;
|
||||
}
|
||||
if (i_data[EXT4_IND_BLOCK]) {
|
||||
retval = update_ind_extent_range(handle, tmp_inode,
|
||||
le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
|
||||
if (retval)
|
||||
goto err_out;
|
||||
} else
|
||||
lb.curr_block += max_entries;
|
||||
if (i_data[EXT4_DIND_BLOCK]) {
|
||||
retval = update_dind_extent_range(handle, tmp_inode,
|
||||
le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
|
||||
if (retval)
|
||||
goto err_out;
|
||||
} else
|
||||
lb.curr_block += max_entries * max_entries;
|
||||
if (i_data[EXT4_TIND_BLOCK]) {
|
||||
retval = update_tind_extent_range(handle, tmp_inode,
|
||||
le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
|
||||
if (retval)
|
||||
goto err_out;
|
||||
}
|
||||
/*
|
||||
* Build the last extent
|
||||
*/
|
||||
retval = finish_range(handle, tmp_inode, &lb);
|
||||
err_out:
|
||||
if (retval)
|
||||
/*
|
||||
* Failure case delete the extent information with the
|
||||
* tmp_inode
|
||||
*/
|
||||
free_ext_block(handle, tmp_inode);
|
||||
else {
|
||||
retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
|
||||
if (retval)
|
||||
/*
|
||||
* if we fail to swap inode data free the extent
|
||||
* details of the tmp inode
|
||||
*/
|
||||
free_ext_block(handle, tmp_inode);
|
||||
}
|
||||
|
||||
/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
|
||||
if (ext4_journal_extend(handle, 1) != 0)
|
||||
ext4_journal_restart(handle, 1);
|
||||
|
||||
/*
|
||||
* Mark the tmp_inode as of size zero
|
||||
*/
|
||||
i_size_write(tmp_inode, 0);
|
||||
|
||||
/*
|
||||
* set the i_blocks count to zero
|
||||
* so that the ext4_delete_inode does the
|
||||
* right job
|
||||
*
|
||||
* We don't need to take the i_lock because
|
||||
* the inode is not visible to user space.
|
||||
*/
|
||||
tmp_inode->i_blocks = 0;
|
||||
|
||||
/* Reset the extent details */
|
||||
ext4_ext_tree_init(handle, tmp_inode);
|
||||
ext4_journal_stop(handle);
|
||||
out:
|
||||
unlock_new_inode(tmp_inode);
|
||||
iput(tmp_inode);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Migrate a simple extent-based inode to use the i_blocks[] array
|
||||
*/
|
||||
int ext4_ind_migrate(struct inode *inode)
|
||||
{
|
||||
struct ext4_extent_header *eh;
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_extent *ex;
|
||||
unsigned int i, len;
|
||||
ext4_fsblk_t blk;
|
||||
handle_t *handle;
|
||||
int ret;
|
||||
|
||||
if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS) ||
|
||||
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
return -EINVAL;
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = ext4_ext_check_inode(inode);
|
||||
if (ret)
|
||||
goto errout;
|
||||
|
||||
eh = ext_inode_hdr(inode);
|
||||
ex = EXT_FIRST_EXTENT(eh);
|
||||
if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS ||
|
||||
eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto errout;
|
||||
}
|
||||
if (eh->eh_entries == 0)
|
||||
blk = len = 0;
|
||||
else {
|
||||
len = le16_to_cpu(ex->ee_len);
|
||||
blk = ext4_ext_pblock(ex);
|
||||
if (len > EXT4_NDIR_BLOCKS) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto errout;
|
||||
}
|
||||
}
|
||||
|
||||
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
|
||||
memset(ei->i_data, 0, sizeof(ei->i_data));
|
||||
for (i=0; i < len; i++)
|
||||
ei->i_data[i] = cpu_to_le32(blk++);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
errout:
|
||||
ext4_journal_stop(handle);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
return ret;
|
||||
}
|
393
fs/ext4/mmp.c
Normal file
393
fs/ext4/mmp.c
Normal file
|
@ -0,0 +1,393 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include "ext4.h"
|
||||
|
||||
/* Checksumming functions */
|
||||
static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int offset = offsetof(struct mmp_struct, mmp_checksum);
|
||||
__u32 csum;
|
||||
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
|
||||
|
||||
return cpu_to_le32(csum);
|
||||
}
|
||||
|
||||
static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
|
||||
{
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
|
||||
}
|
||||
|
||||
static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
|
||||
{
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write the MMP block using WRITE_SYNC to try to get the block on-disk
|
||||
* faster.
|
||||
*/
|
||||
static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
|
||||
{
|
||||
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
|
||||
|
||||
/*
|
||||
* We protect against freezing so that we don't create dirty buffers
|
||||
* on frozen filesystem.
|
||||
*/
|
||||
sb_start_write(sb);
|
||||
ext4_mmp_csum_set(sb, mmp);
|
||||
mark_buffer_dirty(bh);
|
||||
lock_buffer(bh);
|
||||
bh->b_end_io = end_buffer_write_sync;
|
||||
get_bh(bh);
|
||||
submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
|
||||
wait_on_buffer(bh);
|
||||
sb_end_write(sb);
|
||||
if (unlikely(!buffer_uptodate(bh)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the MMP block. It _must_ be read from disk and hence we clear the
|
||||
* uptodate flag on the buffer.
|
||||
*/
|
||||
static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
|
||||
ext4_fsblk_t mmp_block)
|
||||
{
|
||||
struct mmp_struct *mmp;
|
||||
|
||||
if (*bh)
|
||||
clear_buffer_uptodate(*bh);
|
||||
|
||||
/* This would be sb_bread(sb, mmp_block), except we need to be sure
|
||||
* that the MD RAID device cache has been bypassed, and that the read
|
||||
* is not blocked in the elevator. */
|
||||
if (!*bh)
|
||||
*bh = sb_getblk(sb, mmp_block);
|
||||
if (!*bh)
|
||||
return -ENOMEM;
|
||||
if (*bh) {
|
||||
get_bh(*bh);
|
||||
lock_buffer(*bh);
|
||||
(*bh)->b_end_io = end_buffer_read_sync;
|
||||
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
|
||||
wait_on_buffer(*bh);
|
||||
if (!buffer_uptodate(*bh)) {
|
||||
brelse(*bh);
|
||||
*bh = NULL;
|
||||
}
|
||||
}
|
||||
if (unlikely(!*bh)) {
|
||||
ext4_warning(sb, "Error while reading MMP block %llu",
|
||||
mmp_block);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
mmp = (struct mmp_struct *)((*bh)->b_data);
|
||||
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
|
||||
!ext4_mmp_csum_verify(sb, mmp))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump as much information as possible to help the admin.
|
||||
*/
|
||||
void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
|
||||
const char *function, unsigned int line, const char *msg)
|
||||
{
|
||||
__ext4_warning(sb, function, line, msg);
|
||||
__ext4_warning(sb, function, line,
|
||||
"MMP failure info: last update time: %llu, last update "
|
||||
"node: %s, last update device: %s\n",
|
||||
(long long unsigned int) le64_to_cpu(mmp->mmp_time),
|
||||
mmp->mmp_nodename, mmp->mmp_bdevname);
|
||||
}
|
||||
|
||||
/*
|
||||
* kmmpd will update the MMP sequence every s_mmp_update_interval seconds
|
||||
*/
|
||||
static int kmmpd(void *data)
|
||||
{
|
||||
struct super_block *sb = ((struct mmpd_data *) data)->sb;
|
||||
struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
struct mmp_struct *mmp;
|
||||
ext4_fsblk_t mmp_block;
|
||||
u32 seq = 0;
|
||||
unsigned long failed_writes = 0;
|
||||
int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
|
||||
unsigned mmp_check_interval;
|
||||
unsigned long last_update_time;
|
||||
unsigned long diff;
|
||||
int retval;
|
||||
|
||||
mmp_block = le64_to_cpu(es->s_mmp_block);
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
mmp->mmp_time = cpu_to_le64(get_seconds());
|
||||
/*
|
||||
* Start with the higher mmp_check_interval and reduce it if
|
||||
* the MMP block is being updated on time.
|
||||
*/
|
||||
mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
|
||||
EXT4_MMP_MIN_CHECK_INTERVAL);
|
||||
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
|
||||
bdevname(bh->b_bdev, mmp->mmp_bdevname);
|
||||
|
||||
memcpy(mmp->mmp_nodename, init_utsname()->nodename,
|
||||
sizeof(mmp->mmp_nodename));
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (++seq > EXT4_MMP_SEQ_MAX)
|
||||
seq = 1;
|
||||
|
||||
mmp->mmp_seq = cpu_to_le32(seq);
|
||||
mmp->mmp_time = cpu_to_le64(get_seconds());
|
||||
last_update_time = jiffies;
|
||||
|
||||
retval = write_mmp_block(sb, bh);
|
||||
/*
|
||||
* Don't spew too many error messages. Print one every
|
||||
* (s_mmp_update_interval * 60) seconds.
|
||||
*/
|
||||
if (retval) {
|
||||
if ((failed_writes % 60) == 0)
|
||||
ext4_error(sb, "Error writing to MMP block");
|
||||
failed_writes++;
|
||||
}
|
||||
|
||||
if (!(le32_to_cpu(es->s_feature_incompat) &
|
||||
EXT4_FEATURE_INCOMPAT_MMP)) {
|
||||
ext4_warning(sb, "kmmpd being stopped since MMP feature"
|
||||
" has been disabled.");
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (sb->s_flags & MS_RDONLY) {
|
||||
ext4_warning(sb, "kmmpd being stopped since filesystem "
|
||||
"has been remounted as readonly.");
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
diff = jiffies - last_update_time;
|
||||
if (diff < mmp_update_interval * HZ)
|
||||
schedule_timeout_interruptible(mmp_update_interval *
|
||||
HZ - diff);
|
||||
|
||||
/*
|
||||
* We need to make sure that more than mmp_check_interval
|
||||
* seconds have not passed since writing. If that has happened
|
||||
* we need to check if the MMP block is as we left it.
|
||||
*/
|
||||
diff = jiffies - last_update_time;
|
||||
if (diff > mmp_check_interval * HZ) {
|
||||
struct buffer_head *bh_check = NULL;
|
||||
struct mmp_struct *mmp_check;
|
||||
|
||||
retval = read_mmp_block(sb, &bh_check, mmp_block);
|
||||
if (retval) {
|
||||
ext4_error(sb, "error reading MMP data: %d",
|
||||
retval);
|
||||
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
mmp_check = (struct mmp_struct *)(bh_check->b_data);
|
||||
if (mmp->mmp_seq != mmp_check->mmp_seq ||
|
||||
memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
|
||||
sizeof(mmp->mmp_nodename))) {
|
||||
dump_mmp_msg(sb, mmp_check,
|
||||
"Error while updating MMP info. "
|
||||
"The filesystem seems to have been"
|
||||
" multiply mounted.");
|
||||
ext4_error(sb, "abort");
|
||||
goto failed;
|
||||
}
|
||||
put_bh(bh_check);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the mmp_check_interval depending on how much time
|
||||
* it took for the MMP block to be written.
|
||||
*/
|
||||
mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
|
||||
EXT4_MMP_MAX_CHECK_INTERVAL),
|
||||
EXT4_MMP_MIN_CHECK_INTERVAL);
|
||||
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmount seems to be clean.
|
||||
*/
|
||||
mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
|
||||
mmp->mmp_time = cpu_to_le64(get_seconds());
|
||||
|
||||
retval = write_mmp_block(sb, bh);
|
||||
|
||||
failed:
|
||||
kfree(data);
|
||||
brelse(bh);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a random new sequence number but make sure it is not greater than
|
||||
* EXT4_MMP_SEQ_MAX.
|
||||
*/
|
||||
static unsigned int mmp_new_seq(void)
|
||||
{
|
||||
u32 new_seq;
|
||||
|
||||
do {
|
||||
new_seq = prandom_u32();
|
||||
} while (new_seq > EXT4_MMP_SEQ_MAX);
|
||||
|
||||
return new_seq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Protect the filesystem from being mounted more than once.
|
||||
*/
|
||||
int ext4_multi_mount_protect(struct super_block *sb,
|
||||
ext4_fsblk_t mmp_block)
|
||||
{
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct mmp_struct *mmp = NULL;
|
||||
struct mmpd_data *mmpd_data;
|
||||
u32 seq;
|
||||
unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
|
||||
unsigned int wait_time = 0;
|
||||
int retval;
|
||||
|
||||
if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
|
||||
mmp_block >= ext4_blocks_count(es)) {
|
||||
ext4_warning(sb, "Invalid MMP block in superblock");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
retval = read_mmp_block(sb, &bh, mmp_block);
|
||||
if (retval)
|
||||
goto failed;
|
||||
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
|
||||
if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
|
||||
mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
|
||||
|
||||
/*
|
||||
* If check_interval in MMP block is larger, use that instead of
|
||||
* update_interval from the superblock.
|
||||
*/
|
||||
if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
|
||||
mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
|
||||
|
||||
seq = le32_to_cpu(mmp->mmp_seq);
|
||||
if (seq == EXT4_MMP_SEQ_CLEAN)
|
||||
goto skip;
|
||||
|
||||
if (seq == EXT4_MMP_SEQ_FSCK) {
|
||||
dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
wait_time = min(mmp_check_interval * 2 + 1,
|
||||
mmp_check_interval + 60);
|
||||
|
||||
/* Print MMP interval if more than 20 secs. */
|
||||
if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
|
||||
ext4_warning(sb, "MMP interval %u higher than expected, please"
|
||||
" wait.\n", wait_time * 2);
|
||||
|
||||
if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
|
||||
ext4_warning(sb, "MMP startup interrupted, failing mount\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
retval = read_mmp_block(sb, &bh, mmp_block);
|
||||
if (retval)
|
||||
goto failed;
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
if (seq != le32_to_cpu(mmp->mmp_seq)) {
|
||||
dump_mmp_msg(sb, mmp,
|
||||
"Device is already active on another node.");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
skip:
|
||||
/*
|
||||
* write a new random sequence number.
|
||||
*/
|
||||
seq = mmp_new_seq();
|
||||
mmp->mmp_seq = cpu_to_le32(seq);
|
||||
|
||||
retval = write_mmp_block(sb, bh);
|
||||
if (retval)
|
||||
goto failed;
|
||||
|
||||
/*
|
||||
* wait for MMP interval and check mmp_seq.
|
||||
*/
|
||||
if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
|
||||
ext4_warning(sb, "MMP startup interrupted, failing mount\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
retval = read_mmp_block(sb, &bh, mmp_block);
|
||||
if (retval)
|
||||
goto failed;
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
if (seq != le32_to_cpu(mmp->mmp_seq)) {
|
||||
dump_mmp_msg(sb, mmp,
|
||||
"Device is already active on another node.");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
|
||||
if (!mmpd_data) {
|
||||
ext4_warning(sb, "not enough memory for mmpd_data");
|
||||
goto failed;
|
||||
}
|
||||
mmpd_data->sb = sb;
|
||||
mmpd_data->bh = bh;
|
||||
|
||||
/*
|
||||
* Start a kernel thread to update the MMP block periodically.
|
||||
*/
|
||||
EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
|
||||
bdevname(bh->b_bdev,
|
||||
mmp->mmp_bdevname));
|
||||
if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
kfree(mmpd_data);
|
||||
ext4_warning(sb, "Unable to create kmmpd thread for %s.",
|
||||
sb->s_id);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
brelse(bh);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
684
fs/ext4/move_extent.c
Normal file
684
fs/ext4/move_extent.c
Normal file
|
@ -0,0 +1,684 @@
|
|||
/*
|
||||
* Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
|
||||
* Written by Takashi Sato <t-sato@yk.jp.nec.com>
|
||||
* Akira Fujita <a-fujita@rs.jp.nec.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/slab.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "ext4_extents.h"
|
||||
|
||||
/**
|
||||
* get_ext_path - Find an extent path for designated logical block number.
|
||||
*
|
||||
* @inode: an inode which is searched
|
||||
* @lblock: logical block number to find an extent path
|
||||
* @path: pointer to an extent path pointer (for output)
|
||||
*
|
||||
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
|
||||
* on failure.
|
||||
*/
|
||||
static inline int
|
||||
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
|
||||
struct ext4_ext_path **ppath)
|
||||
{
|
||||
struct ext4_ext_path *path;
|
||||
|
||||
path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
|
||||
if (IS_ERR(path))
|
||||
return PTR_ERR(path);
|
||||
if (path[ext_depth(inode)].p_ext == NULL) {
|
||||
ext4_ext_drop_refs(path);
|
||||
kfree(path);
|
||||
*ppath = NULL;
|
||||
return -ENODATA;
|
||||
}
|
||||
*ppath = path;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_double_down_write_data_sem - Acquire two inodes' write lock
|
||||
* of i_data_sem
|
||||
*
|
||||
* Acquire write lock of i_data_sem of the two inodes
|
||||
*/
|
||||
void
|
||||
ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
|
||||
{
|
||||
if (first < second) {
|
||||
down_write(&EXT4_I(first)->i_data_sem);
|
||||
down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
|
||||
} else {
|
||||
down_write(&EXT4_I(second)->i_data_sem);
|
||||
down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
|
||||
*
|
||||
* @orig_inode: original inode structure to be released its lock first
|
||||
* @donor_inode: donor inode structure to be released its lock second
|
||||
* Release write lock of i_data_sem of two inodes (orig and donor).
|
||||
*/
|
||||
void
|
||||
ext4_double_up_write_data_sem(struct inode *orig_inode,
|
||||
struct inode *donor_inode)
|
||||
{
|
||||
up_write(&EXT4_I(orig_inode)->i_data_sem);
|
||||
up_write(&EXT4_I(donor_inode)->i_data_sem);
|
||||
}
|
||||
|
||||
/**
|
||||
* mext_check_coverage - Check that all extents in range has the same type
|
||||
*
|
||||
* @inode: inode in question
|
||||
* @from: block offset of inode
|
||||
* @count: block count to be checked
|
||||
* @unwritten: extents expected to be unwritten
|
||||
* @err: pointer to save error value
|
||||
*
|
||||
* Return 1 if all extents in range has expected type, and zero otherwise.
|
||||
*/
|
||||
static int
|
||||
mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
|
||||
int unwritten, int *err)
|
||||
{
|
||||
struct ext4_ext_path *path = NULL;
|
||||
struct ext4_extent *ext;
|
||||
int ret = 0;
|
||||
ext4_lblk_t last = from + count;
|
||||
while (from < last) {
|
||||
*err = get_ext_path(inode, from, &path);
|
||||
if (*err)
|
||||
goto out;
|
||||
ext = path[ext_depth(inode)].p_ext;
|
||||
if (unwritten != ext4_ext_is_unwritten(ext))
|
||||
goto out;
|
||||
from += ext4_ext_get_actual_len(ext);
|
||||
ext4_ext_drop_refs(path);
|
||||
}
|
||||
ret = 1;
|
||||
out:
|
||||
ext4_ext_drop_refs(path);
|
||||
kfree(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
|
||||
*
|
||||
* @inode1: the inode structure
|
||||
* @inode2: the inode structure
|
||||
* @index1: page index
|
||||
* @index2: page index
|
||||
* @page: result page vector
|
||||
*
|
||||
* Grab two locked pages for inode's by inode order
|
||||
*/
|
||||
static int
|
||||
mext_page_double_lock(struct inode *inode1, struct inode *inode2,
|
||||
pgoff_t index1, pgoff_t index2, struct page *page[2])
|
||||
{
|
||||
struct address_space *mapping[2];
|
||||
unsigned fl = AOP_FLAG_NOFS;
|
||||
|
||||
BUG_ON(!inode1 || !inode2);
|
||||
if (inode1 < inode2) {
|
||||
mapping[0] = inode1->i_mapping;
|
||||
mapping[1] = inode2->i_mapping;
|
||||
} else {
|
||||
pgoff_t tmp = index1;
|
||||
index1 = index2;
|
||||
index2 = tmp;
|
||||
mapping[0] = inode2->i_mapping;
|
||||
mapping[1] = inode1->i_mapping;
|
||||
}
|
||||
|
||||
page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
|
||||
if (!page[0])
|
||||
return -ENOMEM;
|
||||
|
||||
page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
|
||||
if (!page[1]) {
|
||||
unlock_page(page[0]);
|
||||
page_cache_release(page[0]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
/*
|
||||
* grab_cache_page_write_begin() may not wait on page's writeback if
|
||||
* BDI not demand that. But it is reasonable to be very conservative
|
||||
* here and explicitly wait on page's writeback
|
||||
*/
|
||||
wait_on_page_writeback(page[0]);
|
||||
wait_on_page_writeback(page[1]);
|
||||
if (inode1 > inode2) {
|
||||
struct page *tmp;
|
||||
tmp = page[0];
|
||||
page[0] = page[1];
|
||||
page[1] = tmp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Force page buffers uptodate w/o dropping page's lock */
|
||||
static int
|
||||
mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
sector_t block;
|
||||
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
|
||||
unsigned int blocksize, block_start, block_end;
|
||||
int i, err, nr = 0, partial = 0;
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(PageWriteback(page));
|
||||
|
||||
if (PageUptodate(page))
|
||||
return 0;
|
||||
|
||||
blocksize = 1 << inode->i_blkbits;
|
||||
if (!page_has_buffers(page))
|
||||
create_empty_buffers(page, blocksize, 0);
|
||||
|
||||
head = page_buffers(page);
|
||||
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
for (bh = head, block_start = 0; bh != head || !block_start;
|
||||
block++, block_start = block_end, bh = bh->b_this_page) {
|
||||
block_end = block_start + blocksize;
|
||||
if (block_end <= from || block_start >= to) {
|
||||
if (!buffer_uptodate(bh))
|
||||
partial = 1;
|
||||
continue;
|
||||
}
|
||||
if (buffer_uptodate(bh))
|
||||
continue;
|
||||
if (!buffer_mapped(bh)) {
|
||||
err = ext4_get_block(inode, block, bh, 0);
|
||||
if (err) {
|
||||
SetPageError(page);
|
||||
return err;
|
||||
}
|
||||
if (!buffer_mapped(bh)) {
|
||||
zero_user(page, block_start, blocksize);
|
||||
set_buffer_uptodate(bh);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
BUG_ON(nr >= MAX_BUF_PER_PAGE);
|
||||
arr[nr++] = bh;
|
||||
}
|
||||
/* No io required */
|
||||
if (!nr)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
bh = arr[i];
|
||||
if (!bh_uptodate_or_lock(bh)) {
|
||||
err = bh_submit_read(bh);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
out:
|
||||
if (!partial)
|
||||
SetPageUptodate(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* move_extent_per_page - Move extent data per page
|
||||
*
|
||||
* @o_filp: file structure of original file
|
||||
* @donor_inode: donor inode
|
||||
* @orig_page_offset: page index on original file
|
||||
* @donor_page_offset: page index on donor file
|
||||
* @data_offset_in_page: block index where data swapping starts
|
||||
* @block_len_in_page: the number of blocks to be swapped
|
||||
* @unwritten: orig extent is unwritten or not
|
||||
* @err: pointer to save return value
|
||||
*
|
||||
* Save the data in original inode blocks and replace original inode extents
|
||||
* with donor inode extents by calling ext4_swap_extents().
|
||||
* Finally, write out the saved data in new original inode blocks. Return
|
||||
* replaced block count.
|
||||
*/
|
||||
static int
|
||||
move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
pgoff_t orig_page_offset, pgoff_t donor_page_offset,
|
||||
int data_offset_in_page,
|
||||
int block_len_in_page, int unwritten, int *err)
|
||||
{
|
||||
struct inode *orig_inode = file_inode(o_filp);
|
||||
struct page *pagep[2] = {NULL, NULL};
|
||||
handle_t *handle;
|
||||
ext4_lblk_t orig_blk_offset, donor_blk_offset;
|
||||
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
|
||||
unsigned int w_flags = 0;
|
||||
unsigned int tmp_data_size, data_size, replaced_size;
|
||||
int err2, jblocks, retries = 0;
|
||||
int replaced_count = 0;
|
||||
int from = data_offset_in_page << orig_inode->i_blkbits;
|
||||
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
|
||||
|
||||
/*
|
||||
* It needs twice the amount of ordinary journal buffers because
|
||||
* inode and donor_inode may change each different metadata blocks.
|
||||
*/
|
||||
again:
|
||||
*err = 0;
|
||||
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
|
||||
handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
|
||||
if (IS_ERR(handle)) {
|
||||
*err = PTR_ERR(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (segment_eq(get_fs(), KERNEL_DS))
|
||||
w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
|
||||
|
||||
orig_blk_offset = orig_page_offset * blocks_per_page +
|
||||
data_offset_in_page;
|
||||
|
||||
donor_blk_offset = donor_page_offset * blocks_per_page +
|
||||
data_offset_in_page;
|
||||
|
||||
/* Calculate data_size */
|
||||
if ((orig_blk_offset + block_len_in_page - 1) ==
|
||||
((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
|
||||
/* Replace the last block */
|
||||
tmp_data_size = orig_inode->i_size & (blocksize - 1);
|
||||
/*
|
||||
* If data_size equal zero, it shows data_size is multiples of
|
||||
* blocksize. So we set appropriate value.
|
||||
*/
|
||||
if (tmp_data_size == 0)
|
||||
tmp_data_size = blocksize;
|
||||
|
||||
data_size = tmp_data_size +
|
||||
((block_len_in_page - 1) << orig_inode->i_blkbits);
|
||||
} else
|
||||
data_size = block_len_in_page << orig_inode->i_blkbits;
|
||||
|
||||
replaced_size = data_size;
|
||||
|
||||
*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
|
||||
donor_page_offset, pagep);
|
||||
if (unlikely(*err < 0))
|
||||
goto stop_journal;
|
||||
/*
|
||||
* If orig extent was unwritten it can become initialized
|
||||
* at any time after i_data_sem was dropped, in order to
|
||||
* serialize with delalloc we have recheck extent while we
|
||||
* hold page's lock, if it is still the case data copy is not
|
||||
* necessary, just swap data blocks between orig and donor.
|
||||
*/
|
||||
if (unwritten) {
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
/* If any of extents in range became initialized we have to
|
||||
* fallback to data copying */
|
||||
unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
|
||||
block_len_in_page, 1, err);
|
||||
if (*err)
|
||||
goto drop_data_sem;
|
||||
|
||||
unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
|
||||
block_len_in_page, 1, err);
|
||||
if (*err)
|
||||
goto drop_data_sem;
|
||||
|
||||
if (!unwritten) {
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
goto data_copy;
|
||||
}
|
||||
if ((page_has_private(pagep[0]) &&
|
||||
!try_to_release_page(pagep[0], 0)) ||
|
||||
(page_has_private(pagep[1]) &&
|
||||
!try_to_release_page(pagep[1], 0))) {
|
||||
*err = -EBUSY;
|
||||
goto drop_data_sem;
|
||||
}
|
||||
replaced_count = ext4_swap_extents(handle, orig_inode,
|
||||
donor_inode, orig_blk_offset,
|
||||
donor_blk_offset,
|
||||
block_len_in_page, 1, err);
|
||||
drop_data_sem:
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
goto unlock_pages;
|
||||
}
|
||||
data_copy:
|
||||
*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
|
||||
if (*err)
|
||||
goto unlock_pages;
|
||||
|
||||
/* At this point all buffers in range are uptodate, old mapping layout
|
||||
* is no longer required, try to drop it now. */
|
||||
if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
|
||||
(page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
|
||||
*err = -EBUSY;
|
||||
goto unlock_pages;
|
||||
}
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
|
||||
orig_blk_offset, donor_blk_offset,
|
||||
block_len_in_page, 1, err);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
if (*err) {
|
||||
if (replaced_count) {
|
||||
block_len_in_page = replaced_count;
|
||||
replaced_size =
|
||||
block_len_in_page << orig_inode->i_blkbits;
|
||||
} else
|
||||
goto unlock_pages;
|
||||
}
|
||||
/* Perform all necessary steps similar write_begin()/write_end()
|
||||
* but keeping in mind that i_size will not change */
|
||||
*err = __block_write_begin(pagep[0], from, replaced_size,
|
||||
ext4_get_block);
|
||||
if (!*err)
|
||||
*err = block_commit_write(pagep[0], from, from + replaced_size);
|
||||
|
||||
if (unlikely(*err < 0))
|
||||
goto repair_branches;
|
||||
|
||||
/* Even in case of data=writeback it is reasonable to pin
|
||||
* inode to transaction, to prevent unexpected data loss */
|
||||
*err = ext4_jbd2_file_inode(handle, orig_inode);
|
||||
|
||||
unlock_pages:
|
||||
unlock_page(pagep[0]);
|
||||
page_cache_release(pagep[0]);
|
||||
unlock_page(pagep[1]);
|
||||
page_cache_release(pagep[1]);
|
||||
stop_journal:
|
||||
ext4_journal_stop(handle);
|
||||
/* Buffer was busy because probably is pinned to journal transaction,
|
||||
* force transaction commit may help to free it. */
|
||||
if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
|
||||
&retries))
|
||||
goto again;
|
||||
return replaced_count;
|
||||
|
||||
repair_branches:
|
||||
/*
|
||||
* This should never ever happen!
|
||||
* Extents are swapped already, but we are not able to copy data.
|
||||
* Try to swap extents to it's original places
|
||||
*/
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
|
||||
orig_blk_offset, donor_blk_offset,
|
||||
block_len_in_page, 0, &err2);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
if (replaced_count != block_len_in_page) {
|
||||
EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
|
||||
"Unable to copy data block,"
|
||||
" data will be lost.");
|
||||
*err = -EIO;
|
||||
}
|
||||
replaced_count = 0;
|
||||
goto unlock_pages;
|
||||
}
|
||||
|
||||
/**
|
||||
* mext_check_arguments - Check whether move extent can be done
|
||||
*
|
||||
* @orig_inode: original inode
|
||||
* @donor_inode: donor inode
|
||||
* @orig_start: logical start offset in block for orig
|
||||
* @donor_start: logical start offset in block for donor
|
||||
* @len: the number of blocks to be moved
|
||||
*
|
||||
* Check the arguments of ext4_move_extents() whether the files can be
|
||||
* exchanged with each other.
|
||||
* Return 0 on success, or a negative error value on failure.
|
||||
*/
|
||||
static int
|
||||
mext_check_arguments(struct inode *orig_inode,
|
||||
struct inode *donor_inode, __u64 orig_start,
|
||||
__u64 donor_start, __u64 *len)
|
||||
{
|
||||
__u64 orig_eof, donor_eof;
|
||||
unsigned int blkbits = orig_inode->i_blkbits;
|
||||
unsigned int blocksize = 1 << blkbits;
|
||||
|
||||
orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
|
||||
donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
|
||||
|
||||
|
||||
if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
|
||||
ext4_debug("ext4 move extent: suid or sgid is set"
|
||||
" to donor file [ino:orig %lu, donor %lu]\n",
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
|
||||
return -EPERM;
|
||||
|
||||
/* Ext4 move extent does not support swapfile */
|
||||
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
|
||||
ext4_debug("ext4 move extent: The argument files should "
|
||||
"not be swapfile [ino:orig %lu, donor %lu]\n",
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* Ext4 move extent supports only extent based file */
|
||||
if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
|
||||
ext4_debug("ext4 move extent: orig file is not extents "
|
||||
"based file [ino:orig %lu]\n", orig_inode->i_ino);
|
||||
return -EOPNOTSUPP;
|
||||
} else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
|
||||
ext4_debug("ext4 move extent: donor file is not extents "
|
||||
"based file [ino:donor %lu]\n", donor_inode->i_ino);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
|
||||
ext4_debug("ext4 move extent: File size is 0 byte\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Start offset should be same */
|
||||
if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
|
||||
(donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
|
||||
ext4_debug("ext4 move extent: orig and donor's start "
|
||||
"offset are not alligned [ino:orig %lu, donor %lu]\n",
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((orig_start >= EXT_MAX_BLOCKS) ||
|
||||
(donor_start >= EXT_MAX_BLOCKS) ||
|
||||
(*len > EXT_MAX_BLOCKS) ||
|
||||
(donor_start + *len >= EXT_MAX_BLOCKS) ||
|
||||
(orig_start + *len >= EXT_MAX_BLOCKS)) {
|
||||
ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
|
||||
"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (orig_eof < orig_start + *len - 1)
|
||||
*len = orig_eof - orig_start;
|
||||
if (donor_eof < donor_start + *len - 1)
|
||||
*len = donor_eof - donor_start;
|
||||
if (!*len) {
|
||||
ext4_debug("ext4 move extent: len should not be 0 "
|
||||
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
|
||||
donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_move_extents - Exchange the specified range of a file
|
||||
*
|
||||
* @o_filp: file structure of the original file
|
||||
* @d_filp: file structure of the donor file
|
||||
* @orig_blk: start offset in block for orig
|
||||
* @donor_blk: start offset in block for donor
|
||||
* @len: the number of blocks to be moved
|
||||
* @moved_len: moved block length
|
||||
*
|
||||
* This function returns 0 and moved block length is set in moved_len
|
||||
* if succeed, otherwise returns error value.
|
||||
*
|
||||
*/
|
||||
int
|
||||
ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
|
||||
__u64 donor_blk, __u64 len, __u64 *moved_len)
|
||||
{
|
||||
struct inode *orig_inode = file_inode(o_filp);
|
||||
struct inode *donor_inode = file_inode(d_filp);
|
||||
struct ext4_ext_path *path = NULL;
|
||||
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
|
||||
ext4_lblk_t o_end, o_start = orig_blk;
|
||||
ext4_lblk_t d_start = donor_blk;
|
||||
int ret;
|
||||
|
||||
if (orig_inode->i_sb != donor_inode->i_sb) {
|
||||
ext4_debug("ext4 move extent: The argument files "
|
||||
"should be in same FS [ino:orig %lu, donor %lu]\n",
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* orig and donor should be different inodes */
|
||||
if (orig_inode == donor_inode) {
|
||||
ext4_debug("ext4 move extent: The argument files should not "
|
||||
"be same inode [ino:orig %lu, donor %lu]\n",
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Regular file check */
|
||||
if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
|
||||
ext4_debug("ext4 move extent: The argument files should be "
|
||||
"regular file [ino:orig %lu, donor %lu]\n",
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* TODO: This is non obvious task to swap blocks for inodes with full
|
||||
jornaling enabled */
|
||||
if (ext4_should_journal_data(orig_inode) ||
|
||||
ext4_should_journal_data(donor_inode)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Protect orig and donor inodes against a truncate */
|
||||
lock_two_nondirectories(orig_inode, donor_inode);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(orig_inode);
|
||||
ext4_inode_block_unlocked_dio(donor_inode);
|
||||
inode_dio_wait(orig_inode);
|
||||
inode_dio_wait(donor_inode);
|
||||
|
||||
/* Protect extent tree against block allocations via delalloc */
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
/* Check the filesystem environment whether move_extent can be done */
|
||||
ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
|
||||
donor_blk, &len);
|
||||
if (ret)
|
||||
goto out;
|
||||
o_end = o_start + len;
|
||||
|
||||
while (o_start < o_end) {
|
||||
struct ext4_extent *ex;
|
||||
ext4_lblk_t cur_blk, next_blk;
|
||||
pgoff_t orig_page_index, donor_page_index;
|
||||
int offset_in_page;
|
||||
int unwritten, cur_len;
|
||||
|
||||
ret = get_ext_path(orig_inode, o_start, &path);
|
||||
if (ret)
|
||||
goto out;
|
||||
ex = path[path->p_depth].p_ext;
|
||||
next_blk = ext4_ext_next_allocated_block(path);
|
||||
cur_blk = le32_to_cpu(ex->ee_block);
|
||||
cur_len = ext4_ext_get_actual_len(ex);
|
||||
/* Check hole before the start pos */
|
||||
if (cur_blk + cur_len - 1 < o_start) {
|
||||
if (next_blk == EXT_MAX_BLOCKS) {
|
||||
o_start = o_end;
|
||||
ret = -ENODATA;
|
||||
goto out;
|
||||
}
|
||||
d_start += next_blk - o_start;
|
||||
o_start = next_blk;
|
||||
continue;
|
||||
/* Check hole after the start pos */
|
||||
} else if (cur_blk > o_start) {
|
||||
/* Skip hole */
|
||||
d_start += cur_blk - o_start;
|
||||
o_start = cur_blk;
|
||||
/* Extent inside requested range ?*/
|
||||
if (cur_blk >= o_end)
|
||||
goto out;
|
||||
} else { /* in_range(o_start, o_blk, o_len) */
|
||||
cur_len += cur_blk - o_start;
|
||||
}
|
||||
unwritten = ext4_ext_is_unwritten(ex);
|
||||
if (o_end - o_start < cur_len)
|
||||
cur_len = o_end - o_start;
|
||||
|
||||
orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
|
||||
orig_inode->i_blkbits);
|
||||
donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
|
||||
donor_inode->i_blkbits);
|
||||
offset_in_page = o_start % blocks_per_page;
|
||||
if (cur_len > blocks_per_page- offset_in_page)
|
||||
cur_len = blocks_per_page - offset_in_page;
|
||||
/*
|
||||
* Up semaphore to avoid following problems:
|
||||
* a. transaction deadlock among ext4_journal_start,
|
||||
* ->write_begin via pagefault, and jbd2_journal_commit
|
||||
* b. racing with ->readpage, ->write_begin, and ext4_get_block
|
||||
* in move_extent_per_page
|
||||
*/
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
/* Swap original branches with new branches */
|
||||
move_extent_per_page(o_filp, donor_inode,
|
||||
orig_page_index, donor_page_index,
|
||||
offset_in_page, cur_len,
|
||||
unwritten, &ret);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
if (ret < 0)
|
||||
break;
|
||||
o_start += cur_len;
|
||||
d_start += cur_len;
|
||||
}
|
||||
*moved_len = o_start - orig_blk;
|
||||
if (*moved_len > len)
|
||||
*moved_len = len;
|
||||
|
||||
out:
|
||||
if (*moved_len) {
|
||||
ext4_discard_preallocations(orig_inode);
|
||||
ext4_discard_preallocations(donor_inode);
|
||||
}
|
||||
|
||||
ext4_ext_drop_refs(path);
|
||||
kfree(path);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_inode_resume_unlocked_dio(orig_inode);
|
||||
ext4_inode_resume_unlocked_dio(donor_inode);
|
||||
unlock_two_nondirectories(orig_inode, donor_inode);
|
||||
|
||||
return ret;
|
||||
}
|
3612
fs/ext4/namei.c
Normal file
3612
fs/ext4/namei.c
Normal file
File diff suppressed because it is too large
Load diff
497
fs/ext4/page-io.c
Normal file
497
fs/ext4/page-io.c
Normal file
|
@ -0,0 +1,497 @@
|
|||
/*
|
||||
* linux/fs/ext4/page-io.c
|
||||
*
|
||||
* This contains the new page_io functions for ext4
|
||||
*
|
||||
* Written by Theodore Ts'o, 2010.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/highuid.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/aio.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
static struct kmem_cache *io_end_cachep;
|
||||
|
||||
int __init ext4_init_pageio(void)
|
||||
{
|
||||
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
|
||||
if (io_end_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext4_exit_pageio(void)
|
||||
{
|
||||
kmem_cache_destroy(io_end_cachep);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print an buffer I/O error compatible with the fs/buffer.c. This
|
||||
* provides compatibility with dmesg scrapers that look for a specific
|
||||
* buffer I/O error message. We really need a unified error reporting
|
||||
* structure to userspace ala Digital Unix's uerf system, but it's
|
||||
* probably not going to happen in my lifetime, due to LKML politics...
|
||||
*/
|
||||
static void buffer_io_error(struct buffer_head *bh)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
|
||||
bdevname(bh->b_bdev, b),
|
||||
(unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
|
||||
static void ext4_finish_bio(struct bio *bio)
|
||||
{
|
||||
int i;
|
||||
int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
struct bio_vec *bvec;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
struct page *page = bvec->bv_page;
|
||||
struct buffer_head *bh, *head;
|
||||
unsigned bio_start = bvec->bv_offset;
|
||||
unsigned bio_end = bio_start + bvec->bv_len;
|
||||
unsigned under_io = 0;
|
||||
unsigned long flags;
|
||||
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
if (error) {
|
||||
SetPageError(page);
|
||||
set_bit(AS_EIO, &page->mapping->flags);
|
||||
}
|
||||
bh = head = page_buffers(page);
|
||||
/*
|
||||
* We check all buffers in the page under BH_Uptodate_Lock
|
||||
* to avoid races with other end io clearing async_write flags
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
||||
do {
|
||||
if (bh_offset(bh) < bio_start ||
|
||||
bh_offset(bh) + bh->b_size > bio_end) {
|
||||
if (buffer_async_write(bh))
|
||||
under_io++;
|
||||
continue;
|
||||
}
|
||||
clear_buffer_async_write(bh);
|
||||
if (error)
|
||||
buffer_io_error(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
||||
local_irq_restore(flags);
|
||||
if (!under_io)
|
||||
end_page_writeback(page);
|
||||
}
|
||||
}
|
||||
|
||||
static void ext4_release_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct bio *bio, *next_bio;
|
||||
|
||||
BUG_ON(!list_empty(&io_end->list));
|
||||
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
||||
WARN_ON(io_end->handle);
|
||||
|
||||
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
|
||||
wake_up_all(ext4_ioend_wq(io_end->inode));
|
||||
|
||||
for (bio = io_end->bio; bio; bio = next_bio) {
|
||||
next_bio = bio->bi_private;
|
||||
ext4_finish_bio(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
kmem_cache_free(io_end_cachep, io_end);
|
||||
}
|
||||
|
||||
static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct inode *inode = io_end->inode;
|
||||
|
||||
io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
|
||||
/* Wake up anyone waiting on unwritten extent conversion */
|
||||
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
|
||||
wake_up_all(ext4_ioend_wq(inode));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a range of space and convert unwritten extents to written. Note that
|
||||
* we are protected from truncate touching same part of extent tree by the
|
||||
* fact that truncate code waits for all DIO to finish (thus exclusion from
|
||||
* direct IO is achieved) and also waits for PageWriteback bits. Thus we
|
||||
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
|
||||
* completed (happens from ext4_free_ioend()).
|
||||
*/
|
||||
static int ext4_end_io(ext4_io_end_t *io)
|
||||
{
|
||||
struct inode *inode = io->inode;
|
||||
loff_t offset = io->offset;
|
||||
ssize_t size = io->size;
|
||||
handle_t *handle = io->handle;
|
||||
int ret = 0;
|
||||
|
||||
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
||||
"list->prev 0x%p\n",
|
||||
io, inode->i_ino, io->list.next, io->list.prev);
|
||||
|
||||
io->handle = NULL; /* Following call will use up the handle */
|
||||
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
|
||||
if (ret < 0) {
|
||||
ext4_msg(inode->i_sb, KERN_EMERG,
|
||||
"failed to convert unwritten extents to written "
|
||||
"extents -- potential data loss! "
|
||||
"(inode %lu, offset %llu, size %zd, error %d)",
|
||||
inode->i_ino, offset, size, ret);
|
||||
}
|
||||
ext4_clear_io_unwritten_flag(io);
|
||||
ext4_release_io_end(io);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void dump_completed_IO(struct inode *inode, struct list_head *head)
|
||||
{
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct list_head *cur, *before, *after;
|
||||
ext4_io_end_t *io, *io0, *io1;
|
||||
|
||||
if (list_empty(head))
|
||||
return;
|
||||
|
||||
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
|
||||
list_for_each_entry(io, head, list) {
|
||||
cur = &io->list;
|
||||
before = cur->prev;
|
||||
io0 = container_of(before, ext4_io_end_t, list);
|
||||
after = cur->next;
|
||||
io1 = container_of(after, ext4_io_end_t, list);
|
||||
|
||||
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
||||
io, inode->i_ino, io0, io1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Add the io_end to per-inode completed end_io list. */
|
||||
static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
|
||||
struct workqueue_struct *wq;
|
||||
unsigned long flags;
|
||||
|
||||
/* Only reserved conversions from writeback should enter here */
|
||||
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
||||
WARN_ON(!io_end->handle && sbi->s_journal);
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
wq = sbi->rsv_conversion_wq;
|
||||
if (list_empty(&ei->i_rsv_conversion_list))
|
||||
queue_work(wq, &ei->i_rsv_conversion_work);
|
||||
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
}
|
||||
|
||||
static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||
struct list_head *head)
|
||||
{
|
||||
ext4_io_end_t *io;
|
||||
struct list_head unwritten;
|
||||
unsigned long flags;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
int err, ret = 0;
|
||||
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
dump_completed_IO(inode, head);
|
||||
list_replace_init(head, &unwritten);
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
|
||||
while (!list_empty(&unwritten)) {
|
||||
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
||||
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
||||
list_del_init(&io->list);
|
||||
|
||||
err = ext4_end_io(io);
|
||||
if (unlikely(!ret && err))
|
||||
ret = err;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* work on completed IO, to convert unwritten extents to extents
|
||||
*/
|
||||
void ext4_end_io_rsv_work(struct work_struct *work)
|
||||
{
|
||||
struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
|
||||
i_rsv_conversion_work);
|
||||
ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
|
||||
}
|
||||
|
||||
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||
{
|
||||
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
||||
if (io) {
|
||||
atomic_inc(&EXT4_I(inode)->i_ioend_count);
|
||||
io->inode = inode;
|
||||
INIT_LIST_HEAD(&io->list);
|
||||
atomic_set(&io->count, 1);
|
||||
}
|
||||
return io;
|
||||
}
|
||||
|
||||
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
||||
{
|
||||
if (atomic_dec_and_test(&io_end->count)) {
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
|
||||
ext4_release_io_end(io_end);
|
||||
return;
|
||||
}
|
||||
ext4_add_complete_io(io_end);
|
||||
}
|
||||
}
|
||||
|
||||
int ext4_put_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (atomic_dec_and_test(&io_end->count)) {
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
err = ext4_convert_unwritten_extents(io_end->handle,
|
||||
io_end->inode, io_end->offset,
|
||||
io_end->size);
|
||||
io_end->handle = NULL;
|
||||
ext4_clear_io_unwritten_flag(io_end);
|
||||
}
|
||||
ext4_release_io_end(io_end);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
atomic_inc(&io_end->count);
|
||||
return io_end;
|
||||
}
|
||||
|
||||
/* BIO completion function for page writeback */
|
||||
static void ext4_end_bio(struct bio *bio, int error)
|
||||
{
|
||||
ext4_io_end_t *io_end = bio->bi_private;
|
||||
sector_t bi_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
BUG_ON(!io_end);
|
||||
bio->bi_end_io = NULL;
|
||||
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
error = 0;
|
||||
|
||||
if (error) {
|
||||
struct inode *inode = io_end->inode;
|
||||
|
||||
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
|
||||
"(offset %llu size %ld starting block %llu)",
|
||||
error, inode->i_ino,
|
||||
(unsigned long long) io_end->offset,
|
||||
(long) io_end->size,
|
||||
(unsigned long long)
|
||||
bi_sector >> (inode->i_blkbits - 9));
|
||||
mapping_set_error(inode->i_mapping, error);
|
||||
}
|
||||
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
/*
|
||||
* Link bio into list hanging from io_end. We have to do it
|
||||
* atomically as bio completions can be racing against each
|
||||
* other.
|
||||
*/
|
||||
bio->bi_private = xchg(&io_end->bio, bio);
|
||||
ext4_put_io_end_defer(io_end);
|
||||
} else {
|
||||
/*
|
||||
* Drop io_end reference early. Inode can get freed once
|
||||
* we finish the bio.
|
||||
*/
|
||||
ext4_put_io_end_defer(io_end);
|
||||
ext4_finish_bio(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
}
|
||||
|
||||
void ext4_io_submit(struct ext4_io_submit *io)
|
||||
{
|
||||
struct bio *bio = io->io_bio;
|
||||
|
||||
if (bio) {
|
||||
bio_get(io->io_bio);
|
||||
submit_bio(io->io_op, io->io_bio);
|
||||
BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
|
||||
bio_put(io->io_bio);
|
||||
}
|
||||
io->io_bio = NULL;
|
||||
}
|
||||
|
||||
void ext4_io_submit_init(struct ext4_io_submit *io,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
|
||||
io->io_bio = NULL;
|
||||
io->io_end = NULL;
|
||||
}
|
||||
|
||||
static int io_submit_init_bio(struct ext4_io_submit *io,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int nvecs = bio_get_nr_vecs(bh->b_bdev);
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
||||
bio->bi_bdev = bh->b_bdev;
|
||||
bio->bi_end_io = ext4_end_bio;
|
||||
bio->bi_private = ext4_get_io_end(io->io_end);
|
||||
io->io_bio = bio;
|
||||
io->io_next_block = bh->b_blocknr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_submit_add_bh(struct ext4_io_submit *io,
|
||||
struct inode *inode,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
|
||||
submit_and_retry:
|
||||
ext4_io_submit(io);
|
||||
}
|
||||
if (io->io_bio == NULL) {
|
||||
ret = io_submit_init_bio(io, bh);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
|
||||
if (ret != bh->b_size)
|
||||
goto submit_and_retry;
|
||||
io->io_next_block++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
struct page *page,
|
||||
int len,
|
||||
struct writeback_control *wbc,
|
||||
bool keep_towrite)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
unsigned block_start, blocksize;
|
||||
struct buffer_head *bh, *head;
|
||||
int ret = 0;
|
||||
int nr_submitted = 0;
|
||||
|
||||
blocksize = 1 << inode->i_blkbits;
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(PageWriteback(page));
|
||||
|
||||
if (keep_towrite)
|
||||
set_page_writeback_keepwrite(page);
|
||||
else
|
||||
set_page_writeback(page);
|
||||
ClearPageError(page);
|
||||
|
||||
/*
|
||||
* Comments copied from block_write_full_page:
|
||||
*
|
||||
* The page straddles i_size. It must be zeroed out on each and every
|
||||
* writepage invocation because it may be mmapped. "A file is mapped
|
||||
* in multiples of the page size. For a file that is not a multiple of
|
||||
* the page size, the remaining memory is zeroed when mapped, and
|
||||
* writes to that region are not written out to the file."
|
||||
*/
|
||||
if (len < PAGE_CACHE_SIZE)
|
||||
zero_user_segment(page, len, PAGE_CACHE_SIZE);
|
||||
/*
|
||||
* In the first loop we prepare and mark buffers to submit. We have to
|
||||
* mark all buffers in the page before submitting so that
|
||||
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
|
||||
* on the first buffer finishes and we are still working on submitting
|
||||
* the second buffer.
|
||||
*/
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
block_start = bh_offset(bh);
|
||||
if (block_start >= len) {
|
||||
clear_buffer_dirty(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
continue;
|
||||
}
|
||||
if (!buffer_dirty(bh) || buffer_delay(bh) ||
|
||||
!buffer_mapped(bh) || buffer_unwritten(bh)) {
|
||||
/* A hole? We can safely clear the dirty bit */
|
||||
if (!buffer_mapped(bh))
|
||||
clear_buffer_dirty(bh);
|
||||
if (io->io_bio)
|
||||
ext4_io_submit(io);
|
||||
continue;
|
||||
}
|
||||
if (buffer_new(bh)) {
|
||||
clear_buffer_new(bh);
|
||||
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
|
||||
}
|
||||
set_buffer_async_write(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
/* Now submit buffers to write */
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
if (!buffer_async_write(bh))
|
||||
continue;
|
||||
ret = io_submit_add_bh(io, inode, bh);
|
||||
if (ret) {
|
||||
/*
|
||||
* We only get here on ENOMEM. Not much else
|
||||
* we can do but mark the page as dirty, and
|
||||
* better luck next time.
|
||||
*/
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
break;
|
||||
}
|
||||
nr_submitted++;
|
||||
clear_buffer_dirty(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
/* Error stopped previous loop? Clean up buffers... */
|
||||
if (ret) {
|
||||
do {
|
||||
clear_buffer_async_write(bh);
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
unlock_page(page);
|
||||
/* Nothing submitted - we have to end page writeback */
|
||||
if (!nr_submitted)
|
||||
end_page_writeback(page);
|
||||
return ret;
|
||||
}
|
2021
fs/ext4/resize.c
Normal file
2021
fs/ext4/resize.c
Normal file
File diff suppressed because it is too large
Load diff
5791
fs/ext4/super.c
Normal file
5791
fs/ext4/super.c
Normal file
File diff suppressed because it is too large
Load diff
52
fs/ext4/symlink.c
Normal file
52
fs/ext4/symlink.c
Normal file
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* linux/fs/ext4/symlink.c
|
||||
*
|
||||
* Only fast symlinks left here - the rest is done by generic code. AV, 1999
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*
|
||||
* from
|
||||
*
|
||||
* linux/fs/minix/symlink.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* ext4 symlink handling code
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/namei.h>
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
|
||||
nd_set_link(nd, (char *) ei->i_data);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct inode_operations ext4_symlink_inode_operations = {
|
||||
.readlink = generic_readlink,
|
||||
.follow_link = page_follow_link_light,
|
||||
.put_link = page_put_link,
|
||||
.setattr = ext4_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = ext4_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
};
|
||||
|
||||
const struct inode_operations ext4_fast_symlink_inode_operations = {
|
||||
.readlink = generic_readlink,
|
||||
.follow_link = ext4_follow_link,
|
||||
.setattr = ext4_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = ext4_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
};
|
43
fs/ext4/truncate.h
Normal file
43
fs/ext4/truncate.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* linux/fs/ext4/truncate.h
|
||||
*
|
||||
* Common inline functions needed for truncate support
|
||||
*/
|
||||
|
||||
/*
|
||||
* Truncate blocks that were not used by write. We have to truncate the
|
||||
* pagecache as well so that corresponding buffers get properly unmapped.
|
||||
*/
|
||||
static inline void ext4_truncate_failed_write(struct inode *inode)
|
||||
{
|
||||
truncate_inode_pages(inode->i_mapping, inode->i_size);
|
||||
ext4_truncate(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Work out how many blocks we need to proceed with the next chunk of a
|
||||
* truncate transaction.
|
||||
*/
|
||||
static inline unsigned long ext4_blocks_for_truncate(struct inode *inode)
|
||||
{
|
||||
ext4_lblk_t needed;
|
||||
|
||||
needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
|
||||
|
||||
/* Give ourselves just enough room to cope with inodes in which
|
||||
* i_blocks is corrupt: we've seen disk corruptions in the past
|
||||
* which resulted in random data in an inode which looked enough
|
||||
* like a regular file for ext4 to try to delete it. Things
|
||||
* will go a bit crazy if that happens, but at least we should
|
||||
* try not to panic the whole kernel. */
|
||||
if (needed < 2)
|
||||
needed = 2;
|
||||
|
||||
/* But we need to bound the transaction so we don't overflow the
|
||||
* journal. */
|
||||
if (needed > EXT4_MAX_TRANS_DATA)
|
||||
needed = EXT4_MAX_TRANS_DATA;
|
||||
|
||||
return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
|
||||
}
|
||||
|
1729
fs/ext4/xattr.c
Normal file
1729
fs/ext4/xattr.c
Normal file
File diff suppressed because it is too large
Load diff
136
fs/ext4/xattr.h
Normal file
136
fs/ext4/xattr.h
Normal file
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
File: fs/ext4/xattr.h
|
||||
|
||||
On-disk format of extended attributes for the ext4 filesystem.
|
||||
|
||||
(C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
|
||||
*/
|
||||
|
||||
#include <linux/xattr.h>
|
||||
|
||||
/* Magic value in attribute blocks */
|
||||
#define EXT4_XATTR_MAGIC 0xEA020000
|
||||
|
||||
/* Maximum number of references to one attribute block */
|
||||
#define EXT4_XATTR_REFCOUNT_MAX 1024
|
||||
|
||||
/* Name indexes */
|
||||
#define EXT4_XATTR_INDEX_USER 1
|
||||
#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2
|
||||
#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3
|
||||
#define EXT4_XATTR_INDEX_TRUSTED 4
|
||||
#define EXT4_XATTR_INDEX_LUSTRE 5
|
||||
#define EXT4_XATTR_INDEX_SECURITY 6
|
||||
#define EXT4_XATTR_INDEX_SYSTEM 7
|
||||
#define EXT4_XATTR_INDEX_RICHACL 8
|
||||
|
||||
struct ext4_xattr_header {
|
||||
__le32 h_magic; /* magic number for identification */
|
||||
__le32 h_refcount; /* reference count */
|
||||
__le32 h_blocks; /* number of disk blocks used */
|
||||
__le32 h_hash; /* hash value of all attributes */
|
||||
__le32 h_checksum; /* crc32c(uuid+id+xattrblock) */
|
||||
/* id = inum if refcount=1, blknum otherwise */
|
||||
__u32 h_reserved[3]; /* zero right now */
|
||||
};
|
||||
|
||||
struct ext4_xattr_ibody_header {
|
||||
__le32 h_magic; /* magic number for identification */
|
||||
};
|
||||
|
||||
struct ext4_xattr_entry {
|
||||
__u8 e_name_len; /* length of name */
|
||||
__u8 e_name_index; /* attribute name index */
|
||||
__le16 e_value_offs; /* offset in disk block of value */
|
||||
__le32 e_value_block; /* disk block attribute is stored on (n/i) */
|
||||
__le32 e_value_size; /* size of attribute value */
|
||||
__le32 e_hash; /* hash value of name and value */
|
||||
char e_name[0]; /* attribute name */
|
||||
};
|
||||
|
||||
#define EXT4_XATTR_PAD_BITS 2
|
||||
#define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS)
|
||||
#define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1)
|
||||
#define EXT4_XATTR_LEN(name_len) \
|
||||
(((name_len) + EXT4_XATTR_ROUND + \
|
||||
sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
|
||||
#define EXT4_XATTR_NEXT(entry) \
|
||||
((struct ext4_xattr_entry *)( \
|
||||
(char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
|
||||
#define EXT4_XATTR_SIZE(size) \
|
||||
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
|
||||
|
||||
#define IHDR(inode, raw_inode) \
|
||||
((struct ext4_xattr_ibody_header *) \
|
||||
((void *)raw_inode + \
|
||||
EXT4_GOOD_OLD_INODE_SIZE + \
|
||||
EXT4_I(inode)->i_extra_isize))
|
||||
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
|
||||
|
||||
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
|
||||
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
|
||||
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
|
||||
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
|
||||
|
||||
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
|
||||
|
||||
struct ext4_xattr_info {
|
||||
int name_index;
|
||||
const char *name;
|
||||
const void *value;
|
||||
size_t value_len;
|
||||
};
|
||||
|
||||
struct ext4_xattr_search {
|
||||
struct ext4_xattr_entry *first;
|
||||
void *base;
|
||||
void *end;
|
||||
struct ext4_xattr_entry *here;
|
||||
int not_found;
|
||||
};
|
||||
|
||||
struct ext4_xattr_ibody_find {
|
||||
struct ext4_xattr_search s;
|
||||
struct ext4_iloc iloc;
|
||||
};
|
||||
|
||||
extern const struct xattr_handler ext4_xattr_user_handler;
|
||||
extern const struct xattr_handler ext4_xattr_trusted_handler;
|
||||
extern const struct xattr_handler ext4_xattr_security_handler;
|
||||
|
||||
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
|
||||
|
||||
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
|
||||
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
|
||||
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
|
||||
|
||||
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
|
||||
extern void ext4_xattr_put_super(struct super_block *);
|
||||
|
||||
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
|
||||
struct ext4_inode *raw_inode, handle_t *handle);
|
||||
|
||||
extern const struct xattr_handler *ext4_xattr_handlers[];
|
||||
|
||||
extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
|
||||
struct ext4_xattr_ibody_find *is);
|
||||
extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
|
||||
const char *name,
|
||||
void *buffer, size_t buffer_size);
|
||||
extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
|
||||
struct ext4_xattr_info *i,
|
||||
struct ext4_xattr_ibody_find *is);
|
||||
|
||||
extern struct mb_cache *ext4_xattr_create_cache(char *name);
|
||||
extern void ext4_xattr_destroy_cache(struct mb_cache *);
|
||||
|
||||
#ifdef CONFIG_EXT4_FS_SECURITY
|
||||
extern int ext4_init_security(handle_t *handle, struct inode *inode,
|
||||
struct inode *dir, const struct qstr *qstr);
|
||||
#else
|
||||
static inline int ext4_init_security(handle_t *handle, struct inode *inode,
|
||||
struct inode *dir, const struct qstr *qstr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
82
fs/ext4/xattr_security.c
Normal file
82
fs/ext4/xattr_security.c
Normal file
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* linux/fs/ext4/xattr_security.c
|
||||
* Handler for storing security labels as extended attributes.
|
||||
*/
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/slab.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static size_t
|
||||
ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
|
||||
const char *name, size_t name_len, int type)
|
||||
{
|
||||
const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
|
||||
const size_t total_len = prefix_len + name_len + 1;
|
||||
|
||||
|
||||
if (list && total_len <= list_size) {
|
||||
memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
|
||||
memcpy(list+prefix_len, name, name_len);
|
||||
list[prefix_len + name_len] = '\0';
|
||||
}
|
||||
return total_len;
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_xattr_security_get(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size, int type)
|
||||
{
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
|
||||
name, buffer, size);
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_xattr_security_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
|
||||
name, value, size, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
|
||||
void *fs_info)
|
||||
{
|
||||
const struct xattr *xattr;
|
||||
handle_t *handle = fs_info;
|
||||
int err = 0;
|
||||
|
||||
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
|
||||
err = ext4_xattr_set_handle(handle, inode,
|
||||
EXT4_XATTR_INDEX_SECURITY,
|
||||
xattr->name, xattr->value,
|
||||
xattr->value_len, 0);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
|
||||
const struct qstr *qstr)
|
||||
{
|
||||
return security_inode_init_security(inode, dir, qstr,
|
||||
&ext4_initxattrs, handle);
|
||||
}
|
||||
|
||||
const struct xattr_handler ext4_xattr_security_handler = {
|
||||
.prefix = XATTR_SECURITY_PREFIX,
|
||||
.list = ext4_xattr_security_list,
|
||||
.get = ext4_xattr_security_get,
|
||||
.set = ext4_xattr_security_set,
|
||||
};
|
58
fs/ext4/xattr_trusted.c
Normal file
58
fs/ext4/xattr_trusted.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* linux/fs/ext4/xattr_trusted.c
|
||||
* Handler for trusted extended attributes.
|
||||
*
|
||||
* Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
|
||||
*/
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/fs.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static size_t
|
||||
ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
|
||||
const char *name, size_t name_len, int type)
|
||||
{
|
||||
const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||
const size_t total_len = prefix_len + name_len + 1;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return 0;
|
||||
|
||||
if (list && total_len <= list_size) {
|
||||
memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
|
||||
memcpy(list+prefix_len, name, name_len);
|
||||
list[prefix_len + name_len] = '\0';
|
||||
}
|
||||
return total_len;
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
|
||||
size_t size, int type)
|
||||
{
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
|
||||
name, buffer, size);
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
|
||||
name, value, size, flags);
|
||||
}
|
||||
|
||||
const struct xattr_handler ext4_xattr_trusted_handler = {
|
||||
.prefix = XATTR_TRUSTED_PREFIX,
|
||||
.list = ext4_xattr_trusted_list,
|
||||
.get = ext4_xattr_trusted_get,
|
||||
.set = ext4_xattr_trusted_set,
|
||||
};
|
61
fs/ext4/xattr_user.c
Normal file
61
fs/ext4/xattr_user.c
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* linux/fs/ext4/xattr_user.c
|
||||
* Handler for extended user attributes.
|
||||
*
|
||||
* Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
|
||||
*/
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static size_t
|
||||
ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
|
||||
const char *name, size_t name_len, int type)
|
||||
{
|
||||
const size_t prefix_len = XATTR_USER_PREFIX_LEN;
|
||||
const size_t total_len = prefix_len + name_len + 1;
|
||||
|
||||
if (!test_opt(dentry->d_sb, XATTR_USER))
|
||||
return 0;
|
||||
|
||||
if (list && total_len <= list_size) {
|
||||
memcpy(list, XATTR_USER_PREFIX, prefix_len);
|
||||
memcpy(list+prefix_len, name, name_len);
|
||||
list[prefix_len + name_len] = '\0';
|
||||
}
|
||||
return total_len;
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_xattr_user_get(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size, int type)
|
||||
{
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
if (!test_opt(dentry->d_sb, XATTR_USER))
|
||||
return -EOPNOTSUPP;
|
||||
return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
|
||||
name, buffer, size);
|
||||
}
|
||||
|
||||
static int
|
||||
ext4_xattr_user_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
if (!test_opt(dentry->d_sb, XATTR_USER))
|
||||
return -EOPNOTSUPP;
|
||||
return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
|
||||
name, value, size, flags);
|
||||
}
|
||||
|
||||
const struct xattr_handler ext4_xattr_user_handler = {
|
||||
.prefix = XATTR_USER_PREFIX,
|
||||
.list = ext4_xattr_user_list,
|
||||
.get = ext4_xattr_user_get,
|
||||
.set = ext4_xattr_user_set,
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue