Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

90
fs/btrfs/Kconfig Normal file
View file

@ -0,0 +1,90 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
select CRYPTO
select CRYPTO_CRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
select LZO_DECOMPRESS
select RAID6_PQ
select XOR_BLOCKS
help
Btrfs is a general purpose copy-on-write filesystem with extents,
writable snapshotting, support for multiple devices and many more
features focused on fault tolerance, repair and easy administration.
The filesystem disk format is no longer unstable, and it's not
expected to change unless there are strong reasons to do so. If there
is a format change, file systems with a unchanged format will
continue to be mountable and usable by newer kernels.
For more information, please see the web pages at
http://btrfs.wiki.kernel.org.
To compile this file system support as a module, choose M here. The
module will be called btrfs.
If unsure, say N.
config BTRFS_FS_POSIX_ACL
bool "Btrfs POSIX Access Control Lists"
depends on BTRFS_FS
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
config BTRFS_FS_CHECK_INTEGRITY
bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
depends on BTRFS_FS
help
Adds code that examines all block write requests (including
writes of the super block). The goal is to verify that the
state of the filesystem on disk is always consistent, i.e.,
after a power-loss or kernel panic event the filesystem is
in a consistent state.
If the integrity check tool is included and activated in
the mount options, plenty of kernel memory is used, and
plenty of additional CPU cycles are spent. Enabling this
functionality is not intended for normal use.
In most cases, unless you are a btrfs developer who needs
to verify the integrity of (super)-block write requests
during the run of a regression test, say N
config BTRFS_FS_RUN_SANITY_TESTS
bool "Btrfs will run sanity tests upon loading"
depends on BTRFS_FS
help
This will run some basic sanity tests on the free space cache
code to make sure it is acting as it should. These are mostly
regression tests and are only really interesting to btrfs
developers.
If unsure, say N.
config BTRFS_DEBUG
bool "Btrfs debugging support"
depends on BTRFS_FS
help
Enable run-time debugging support for the btrfs filesystem. This may
enable additional and expensive checks with negative impact on
performance, or export extra information via sysfs.
If unsure, say N.
config BTRFS_ASSERT
bool "Btrfs assert support"
depends on BTRFS_FS
help
Enable run-time assertion checking. This will result in panics if
any of the assertions trip. This is meant for btrfs developers only.
If unsure, say N.

19
fs/btrfs/Makefile Normal file
View file

@ -0,0 +1,19 @@
obj-$(CONFIG_BTRFS_FS) := btrfs.o
btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o

166
fs/btrfs/acl.c Normal file
View file

@ -0,0 +1,166 @@
/*
* Copyright (C) 2007 Red Hat. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include <linux/posix_acl.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "ctree.h"
#include "btrfs_inode.h"
#include "xattr.h"
struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
{
int size;
const char *name;
char *value = NULL;
struct posix_acl *acl;
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
break;
case ACL_TYPE_DEFAULT:
name = POSIX_ACL_XATTR_DEFAULT;
break;
default:
BUG();
}
size = __btrfs_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
if (!value)
return ERR_PTR(-ENOMEM);
size = __btrfs_getxattr(inode, name, value, size);
}
if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
} else if (size == -ENOENT || size == -ENODATA || size == 0) {
/* FIXME, who returns -ENOENT? I think nobody */
acl = NULL;
} else {
acl = ERR_PTR(-EIO);
}
kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl;
}
/*
* Needs to be called with fs_mutex held
*/
static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
const char *name;
char *value = NULL;
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
return ret;
if (ret == 0)
acl = NULL;
}
ret = 0;
break;
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode))
return acl ? -EINVAL : 0;
name = POSIX_ACL_XATTR_DEFAULT;
break;
default:
return -EINVAL;
}
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
value = kmalloc(size, GFP_NOFS);
if (!value) {
ret = -ENOMEM;
goto out;
}
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (ret < 0)
goto out;
}
ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
out:
kfree(value);
if (!ret)
set_cached_acl(inode, type, acl);
return ret;
}
int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
return __btrfs_set_acl(NULL, inode, acl, type);
}
/*
* btrfs_init_acl is already generally called under fs_mutex, so the locking
* stuff has been fixed to work with that. If the locking stuff changes, we
* need to re-evaluate the acl locking stuff.
*/
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir)
{
struct posix_acl *default_acl, *acl;
int ret = 0;
/* this happens with subvols */
if (!dir)
return 0;
ret = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (ret)
return ret;
if (default_acl) {
ret = __btrfs_set_acl(trans, inode, default_acl,
ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
}
if (acl) {
if (!ret)
ret = __btrfs_set_acl(trans, inode, acl,
ACL_TYPE_ACCESS);
posix_acl_release(acl);
}
if (!default_acl && !acl)
cache_no_acl(inode);
return ret;
}

365
fs/btrfs/async-thread.c Normal file
View file

@ -0,0 +1,365 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2014 Fujitsu. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/freezer.h>
#include "async-thread.h"
#include "ctree.h"
#define WORK_DONE_BIT 0
#define WORK_ORDER_DONE_BIT 1
#define WORK_HIGH_PRIO_BIT 2
#define NO_THRESHOLD (-1)
#define DFT_THRESHOLD (32)
struct __btrfs_workqueue {
struct workqueue_struct *normal_wq;
/* List head pointing to ordered work list */
struct list_head ordered_list;
/* Spinlock for ordered_list */
spinlock_t list_lock;
/* Thresholding related variants */
atomic_t pending;
int max_active;
int current_max;
int thresh;
unsigned int count;
spinlock_t thres_lock;
};
struct btrfs_workqueue {
struct __btrfs_workqueue *normal;
struct __btrfs_workqueue *high;
};
static void normal_work_helper(struct btrfs_work *work);
#define BTRFS_WORK_HELPER(name) \
void btrfs_##name(struct work_struct *arg) \
{ \
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
normal_work); \
normal_work_helper(work); \
}
BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
BTRFS_WORK_HELPER(cache_helper);
BTRFS_WORK_HELPER(submit_helper);
BTRFS_WORK_HELPER(fixup_helper);
BTRFS_WORK_HELPER(endio_helper);
BTRFS_WORK_HELPER(endio_meta_helper);
BTRFS_WORK_HELPER(endio_meta_write_helper);
BTRFS_WORK_HELPER(endio_raid56_helper);
BTRFS_WORK_HELPER(endio_repair_helper);
BTRFS_WORK_HELPER(rmw_helper);
BTRFS_WORK_HELPER(endio_write_helper);
BTRFS_WORK_HELPER(freespace_write_helper);
BTRFS_WORK_HELPER(delayed_meta_helper);
BTRFS_WORK_HELPER(readahead_helper);
BTRFS_WORK_HELPER(qgroup_rescan_helper);
BTRFS_WORK_HELPER(extent_refs_helper);
BTRFS_WORK_HELPER(scrub_helper);
BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);
static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
int thresh)
{
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
if (!ret)
return NULL;
ret->max_active = max_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
if (thresh < DFT_THRESHOLD) {
ret->current_max = max_active;
ret->thresh = NO_THRESHOLD;
} else {
ret->current_max = 1;
ret->thresh = thresh;
}
if (flags & WQ_HIGHPRI)
ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
ret->max_active,
"btrfs", name);
else
ret->normal_wq = alloc_workqueue("%s-%s", flags,
ret->max_active, "btrfs",
name);
if (!ret->normal_wq) {
kfree(ret);
return NULL;
}
INIT_LIST_HEAD(&ret->ordered_list);
spin_lock_init(&ret->list_lock);
spin_lock_init(&ret->thres_lock);
trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI);
return ret;
}
static inline void
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
int flags,
int max_active,
int thresh)
{
struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
if (!ret)
return NULL;
ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
max_active, thresh);
if (!ret->normal) {
kfree(ret);
return NULL;
}
if (flags & WQ_HIGHPRI) {
ret->high = __btrfs_alloc_workqueue(name, flags, max_active,
thresh);
if (!ret->high) {
__btrfs_destroy_workqueue(ret->normal);
kfree(ret);
return NULL;
}
}
return ret;
}
/*
* Hook for threshold which will be called in btrfs_queue_work.
* This hook WILL be called in IRQ handler context,
* so workqueue_set_max_active MUST NOT be called in this hook
*/
static inline void thresh_queue_hook(struct __btrfs_workqueue *wq)
{
if (wq->thresh == NO_THRESHOLD)
return;
atomic_inc(&wq->pending);
}
/*
* Hook for threshold which will be called before executing the work,
* This hook is called in kthread content.
* So workqueue_set_max_active is called here.
*/
static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
{
int new_max_active;
long pending;
int need_change = 0;
if (wq->thresh == NO_THRESHOLD)
return;
atomic_dec(&wq->pending);
spin_lock(&wq->thres_lock);
/*
* Use wq->count to limit the calling frequency of
* workqueue_set_max_active.
*/
wq->count++;
wq->count %= (wq->thresh / 4);
if (!wq->count)
goto out;
new_max_active = wq->current_max;
/*
* pending may be changed later, but it's OK since we really
* don't need it so accurate to calculate new_max_active.
*/
pending = atomic_read(&wq->pending);
if (pending > wq->thresh)
new_max_active++;
if (pending < wq->thresh / 2)
new_max_active--;
new_max_active = clamp_val(new_max_active, 1, wq->max_active);
if (new_max_active != wq->current_max) {
need_change = 1;
wq->current_max = new_max_active;
}
out:
spin_unlock(&wq->thres_lock);
if (need_change) {
workqueue_set_max_active(wq->normal_wq, wq->current_max);
}
}
static void run_ordered_work(struct __btrfs_workqueue *wq)
{
struct list_head *list = &wq->ordered_list;
struct btrfs_work *work;
spinlock_t *lock = &wq->list_lock;
unsigned long flags;
while (1) {
spin_lock_irqsave(lock, flags);
if (list_empty(list))
break;
work = list_entry(list->next, struct btrfs_work,
ordered_list);
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
/*
* we are going to call the ordered done function, but
* we leave the work item on the list as a barrier so
* that later work items that are done don't have their
* functions called before this one returns
*/
if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
break;
trace_btrfs_ordered_sched(work);
spin_unlock_irqrestore(lock, flags);
work->ordered_func(work);
/* now take the lock again and drop our item from the list */
spin_lock_irqsave(lock, flags);
list_del(&work->ordered_list);
spin_unlock_irqrestore(lock, flags);
/*
* we don't want to call the ordered free functions
* with the lock held though
*/
work->ordered_free(work);
trace_btrfs_all_work_done(work);
}
spin_unlock_irqrestore(lock, flags);
}
static void normal_work_helper(struct btrfs_work *work)
{
struct __btrfs_workqueue *wq;
int need_order = 0;
/*
* We should not touch things inside work in the following cases:
* 1) after work->func() if it has no ordered_free
* Since the struct is freed in work->func().
* 2) after setting WORK_DONE_BIT
* The work may be freed in other threads almost instantly.
* So we save the needed things here.
*/
if (work->ordered_func)
need_order = 1;
wq = work->wq;
trace_btrfs_work_sched(work);
thresh_exec_hook(wq);
work->func(work);
if (need_order) {
set_bit(WORK_DONE_BIT, &work->flags);
run_ordered_work(wq);
}
if (!need_order)
trace_btrfs_all_work_done(work);
}
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
btrfs_func_t func,
btrfs_func_t ordered_func,
btrfs_func_t ordered_free)
{
work->func = func;
work->ordered_func = ordered_func;
work->ordered_free = ordered_free;
INIT_WORK(&work->normal_work, uniq_func);
INIT_LIST_HEAD(&work->ordered_list);
work->flags = 0;
}
static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
struct btrfs_work *work)
{
unsigned long flags;
work->wq = wq;
thresh_queue_hook(wq);
if (work->ordered_func) {
spin_lock_irqsave(&wq->list_lock, flags);
list_add_tail(&work->ordered_list, &wq->ordered_list);
spin_unlock_irqrestore(&wq->list_lock, flags);
}
queue_work(wq->normal_wq, &work->normal_work);
trace_btrfs_work_queued(work);
}
void btrfs_queue_work(struct btrfs_workqueue *wq,
struct btrfs_work *work)
{
struct __btrfs_workqueue *dest_wq;
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high)
dest_wq = wq->high;
else
dest_wq = wq->normal;
__btrfs_queue_work(dest_wq, work);
}
static inline void
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq)
{
destroy_workqueue(wq->normal_wq);
trace_btrfs_workqueue_destroy(wq);
kfree(wq);
}
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
{
if (!wq)
return;
if (wq->high)
__btrfs_destroy_workqueue(wq->high);
__btrfs_destroy_workqueue(wq->normal);
kfree(wq);
}
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
{
if (!wq)
return;
wq->normal->max_active = max;
if (wq->high)
wq->high->max_active = max;
}
void btrfs_set_work_high_priority(struct btrfs_work *work)
{
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}

81
fs/btrfs/async-thread.h Normal file
View file

@ -0,0 +1,81 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2014 Fujitsu. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_ASYNC_THREAD_
#define __BTRFS_ASYNC_THREAD_
#include <linux/workqueue.h>
struct btrfs_workqueue;
/* Internal use only */
struct __btrfs_workqueue;
struct btrfs_work;
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
typedef void (*btrfs_work_func_t)(struct work_struct *arg);
struct btrfs_work {
btrfs_func_t func;
btrfs_func_t ordered_func;
btrfs_func_t ordered_free;
/* Don't touch things below */
struct work_struct normal_work;
struct list_head ordered_list;
struct __btrfs_workqueue *wq;
unsigned long flags;
};
#define BTRFS_WORK_HELPER_PROTO(name) \
void btrfs_##name(struct work_struct *arg)
BTRFS_WORK_HELPER_PROTO(worker_helper);
BTRFS_WORK_HELPER_PROTO(delalloc_helper);
BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
BTRFS_WORK_HELPER_PROTO(cache_helper);
BTRFS_WORK_HELPER_PROTO(submit_helper);
BTRFS_WORK_HELPER_PROTO(fixup_helper);
BTRFS_WORK_HELPER_PROTO(endio_helper);
BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
BTRFS_WORK_HELPER_PROTO(rmw_helper);
BTRFS_WORK_HELPER_PROTO(endio_write_helper);
BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
BTRFS_WORK_HELPER_PROTO(readahead_helper);
BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
BTRFS_WORK_HELPER_PROTO(scrub_helper);
BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
int flags,
int max_active,
int thresh);
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
btrfs_func_t func,
btrfs_func_t ordered_func,
btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
struct btrfs_work *work);
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
#endif

1970
fs/btrfs/backref.c Normal file

File diff suppressed because it is too large Load diff

80
fs/btrfs/backref.h Normal file
View file

@ -0,0 +1,80 @@
/*
* Copyright (C) 2011 STRATO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_BACKREF__
#define __BTRFS_BACKREF__
#include <linux/btrfs.h>
#include "ulist.h"
#include "extent_io.h"
struct inode_fs_paths {
struct btrfs_path *btrfs_path;
struct btrfs_root *fs_root;
struct btrfs_data_container *fspath;
};
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
void *ctx);
int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path);
int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
struct btrfs_path *path, struct btrfs_key *found_key,
u64 *flags);
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
struct btrfs_key *key, struct btrfs_extent_item *ei,
u32 item_size, u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
u64 extent_offset, int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
char *dest, u32 size);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path);
void free_ipath(struct inode_fs_paths *ipath);
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path,
struct btrfs_inode_extref **ret_extref,
u64 *found_off);
int btrfs_check_shared(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 root_objectid,
u64 inum, u64 bytenr);
int __init btrfs_prelim_ref_init(void);
void btrfs_prelim_ref_exit(void);
#endif

317
fs/btrfs/btrfs_inode.h Normal file
View file

@ -0,0 +1,317 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_I__
#define __BTRFS_I__
#include <linux/hash.h>
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
#include "delayed-inode.h"
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
/*
* The following 3 bits are meant only for the btree inode.
* When any of them is set, it means an error happened while writing an
* extent buffer belonging to:
* 1) a non-log btree
* 2) a log btree and first log sub-transaction
* 3) a log btree and second log sub-transaction
*/
#define BTRFS_INODE_BTREE_ERR 12
#define BTRFS_INODE_BTREE_LOG1_ERR 13
#define BTRFS_INODE_BTREE_LOG2_ERR 14
/* in memory btrfs inode */
struct btrfs_inode {
/* which subvolume this inode belongs to */
struct btrfs_root *root;
/* key used to find this inode on disk. This is used by the code
* to read in roots of subvolumes
*/
struct btrfs_key location;
/* Lock for counters */
spinlock_t lock;
/* the extent_tree has caches of all the extent mappings to disk */
struct extent_map_tree extent_tree;
/* the io_tree does range state (DIRTY, LOCKED etc) */
struct extent_io_tree io_tree;
/* special utility tree used to record which mirrors have already been
* tried when checksums fail for a given block
*/
struct extent_io_tree io_failure_tree;
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
/* held while doing delalloc reservations */
struct mutex delalloc_mutex;
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
/* list of all the delalloc inodes in the FS. There are times we need
* to write all the delalloc pages to disk, and this list is used
* to walk them all.
*/
struct list_head delalloc_inodes;
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
unsigned long runtime_flags;
/* Keep track of who's O_SYNC/fsyncing currently */
atomic_t sync_writers;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
u64 generation;
/*
* transid of the trans_handle that last modified this inode
*/
u64 last_trans;
/*
* transid that last logged this inode
*/
u64 logged_trans;
/*
* log transid when this inode was last modified
*/
int last_sub_trans;
/* a local copy of root's last_log_commit */
int last_log_commit;
/* total number of bytes pending delalloc, used by stat to calc the
* real block usage of the file
*/
u64 delalloc_bytes;
/*
* total number of bytes pending defrag, used by stat to check whether
* it needs COW.
*/
u64 defrag_bytes;
/*
* the size of the file stored in the metadata on disk. data=ordered
* means the in-memory i_size might be larger than the size on disk
* because not all the blocks are written yet.
*/
u64 disk_i_size;
/*
* if this is a directory then index_cnt is the counter for the index
* number for new files that are created
*/
u64 index_cnt;
/* Cache the directory index number to speed the dir/file remove */
u64 dir_index;
/* the fsync log has some corner cases that mean we have to check
* directories to see if any unlinks have been done before
* the directory was logged. See tree-log.c for all the
* details
*/
u64 last_unlink_trans;
/*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.
*/
u64 csum_bytes;
/* flags field from the on disk inode */
u32 flags;
/*
* Counters to keep track of the number of extent item's we may use due
* to delalloc and such. outstanding_extents is the number of extent
* items we think we'll end up using, and reserved_extents is the number
* of extent items we've reserved metadata for.
*/
unsigned outstanding_extents;
unsigned reserved_extents;
/*
* always compress this one file
*/
unsigned force_compress;
struct btrfs_delayed_node *delayed_node;
struct inode vfs_inode;
};
extern unsigned char btrfs_filetype_table[];
static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
{
return container_of(inode, struct btrfs_inode, vfs_inode);
}
static inline unsigned long btrfs_inode_hash(u64 objectid,
const struct btrfs_root *root)
{
u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
#if BITS_PER_LONG == 32
h = (h >> 32) ^ (h & 0xffffffff);
#endif
return (unsigned long)h;
}
static inline void btrfs_insert_inode_hash(struct inode *inode)
{
unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
__insert_inode_hash(inode, h);
}
static inline u64 btrfs_ino(struct inode *inode)
{
u64 ino = BTRFS_I(inode)->location.objectid;
/*
* !ino: btree_inode
* type == BTRFS_ROOT_ITEM_KEY: subvol dir
*/
if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY)
ino = inode->i_ino;
return ino;
}
static inline void btrfs_i_size_write(struct inode *inode, u64 size)
{
i_size_write(inode, size);
BTRFS_I(inode)->disk_i_size = size;
}
static inline bool btrfs_is_free_space_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
if (root == root->fs_info->tree_root &&
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
return true;
if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
if (BTRFS_I(inode)->logged_trans == generation &&
BTRFS_I(inode)->last_sub_trans <=
BTRFS_I(inode)->last_log_commit &&
BTRFS_I(inode)->last_sub_trans <=
BTRFS_I(inode)->root->last_log_commit) {
/*
* After a ranged fsync we might have left some extent maps
* (that fall outside the fsync's range). So return false
* here if the list isn't empty, to make sure btrfs_log_inode()
* will be called and process those extent maps.
*/
smp_mb();
if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
return 1;
}
return 0;
}
#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
struct btrfs_dio_private {
struct inode *inode;
unsigned long flags;
u64 logical_offset;
u64 disk_bytenr;
u64 bytes;
void *private;
/* number of bios pending for this dio */
atomic_t pending_bios;
/* IO errors */
int errors;
/* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;
/* dio_bio came from fs/direct-io.c */
struct bio *dio_bio;
/*
* The original bio may be splited to several sub-bios, this is
* done during endio of sub-bios
*/
int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
};
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb__before_atomic();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags);
}
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
#endif

3281
fs/btrfs/check-integrity.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,38 @@
/*
* Copyright (C) STRATO AG 2011. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#if !defined(__BTRFS_CHECK_INTEGRITY__)
#define __BTRFS_CHECK_INTEGRITY__
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
int btrfsic_submit_bh(int rw, struct buffer_head *bh);
void btrfsic_submit_bio(int rw, struct bio *bio);
int btrfsic_submit_bio_wait(int rw, struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
#define btrfsic_submit_bio_wait submit_bio_wait
#endif
int btrfsic_mount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices,
int including_extent_data, u32 print_mask);
void btrfsic_unmount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices);
#endif

1085
fs/btrfs/compression.c Normal file

File diff suppressed because it is too large Load diff

83
fs/btrfs/compression.h Normal file
View file

@ -0,0 +1,83 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
void btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping,
u64 start, unsigned long len,
struct page **pages,
unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out);
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen);
int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio_vec *bvec, int vcnt,
unsigned long *pg_index,
unsigned long *pg_offset);
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
unsigned long pg_index,
unsigned long pg_offset);
struct btrfs_compress_op {
struct list_head *(*alloc_workspace)(void);
void (*free_workspace)(struct list_head *workspace);
int (*compress_pages)(struct list_head *workspace,
struct address_space *mapping,
u64 start, unsigned long len,
struct page **pages,
unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out);
int (*decompress_biovec)(struct list_head *workspace,
struct page **pages_in,
u64 disk_start,
struct bio_vec *bvec,
int vcnt,
size_t srclen);
int (*decompress)(struct list_head *workspace,
unsigned char *data_in,
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen);
};
extern struct btrfs_compress_op btrfs_zlib_compress;
extern struct btrfs_compress_op btrfs_lzo_compress;
#endif

5900
fs/btrfs/ctree.c Normal file

File diff suppressed because it is too large Load diff

4145
fs/btrfs/ctree.h Normal file

File diff suppressed because it is too large Load diff

1988
fs/btrfs/delayed-inode.c Normal file

File diff suppressed because it is too large Load diff

156
fs/btrfs/delayed-inode.h Normal file
View file

@ -0,0 +1,156 @@
/*
* Copyright (C) 2011 Fujitsu. All rights reserved.
* Written by Miao Xie <miaox@cn.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __DELAYED_TREE_OPERATION_H
#define __DELAYED_TREE_OPERATION_H
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/atomic.h>
#include "ctree.h"
/* types of the delayed item */
#define BTRFS_DELAYED_INSERTION_ITEM 1
#define BTRFS_DELAYED_DELETION_ITEM 2
struct btrfs_delayed_root {
spinlock_t lock;
struct list_head node_list;
/*
* Used for delayed nodes which is waiting to be dealt with by the
* worker. If the delayed node is inserted into the work queue, we
* drop it from this list.
*/
struct list_head prepare_list;
atomic_t items; /* for delayed items */
atomic_t items_seq; /* for delayed items */
int nodes; /* for delayed nodes */
wait_queue_head_t wait;
};
#define BTRFS_DELAYED_NODE_IN_LIST 0
#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
#define BTRFS_DELAYED_NODE_DEL_IREF 2
struct btrfs_delayed_node {
u64 inode_id;
u64 bytes_reserved;
struct btrfs_root *root;
/* Used to add the node into the delayed root's node list. */
struct list_head n_list;
/*
* Used to add the node into the prepare list, the nodes in this list
* is waiting to be dealt with by the async worker.
*/
struct list_head p_list;
struct rb_root ins_root;
struct rb_root del_root;
struct mutex mutex;
struct btrfs_inode_item inode_item;
atomic_t refs;
u64 index_cnt;
unsigned long flags;
int count;
};
struct btrfs_delayed_item {
struct rb_node rb_node;
struct btrfs_key key;
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
int ins_or_del;
u32 data_len;
char data[0];
};
static inline void btrfs_init_delayed_root(
struct btrfs_delayed_root *delayed_root)
{
atomic_set(&delayed_root->items, 0);
atomic_set(&delayed_root->items_seq, 0);
delayed_root->nodes = 0;
spin_lock_init(&delayed_root->lock);
init_waitqueue_head(&delayed_root->wait);
INIT_LIST_HEAD(&delayed_root->node_list);
INIT_LIST_HEAD(&delayed_root->prepare_list);
}
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *name,
int name_len, struct inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index);
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *dir,
u64 index);
int btrfs_inode_delayed_dir_index_count(struct inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr);
void btrfs_balance_delayed_items(struct btrfs_root *root);
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct inode *inode);
/* Used for evicting the inode. */
void btrfs_remove_delayed_node(struct inode *inode);
void btrfs_kill_delayed_inode_items(struct inode *inode);
int btrfs_commit_inode_delayed_inode(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
int btrfs_delayed_delete_inode_ref(struct inode *inode);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
/* Used for clean the transaction */
void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
/* Used for readdir() */
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
struct list_head *del_list);
void btrfs_put_delayed_items(struct list_head *ins_list,
struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list);
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
/* for debugging */
void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
#endif

944
fs/btrfs/delayed-ref.c Normal file
View file

@ -0,0 +1,944 @@
/*
* Copyright (C) 2009 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sort.h>
#include "ctree.h"
#include "delayed-ref.h"
#include "transaction.h"
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
struct kmem_cache *btrfs_delayed_data_ref_cachep;
struct kmem_cache *btrfs_delayed_extent_op_cachep;
/*
* delayed back reference update tracking. For subvolume trees
* we queue up extent allocations and backref maintenance for
* delayed processing. This avoids deep call chains where we
* add extents in the middle of btrfs_search_slot, and it allows
* us to buffer up frequently modified backrefs in an rb tree instead
* of hammering updates on the extent allocation tree.
*/
/*
* compare two delayed tree backrefs with same bytenr and type
*/
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
struct btrfs_delayed_tree_ref *ref1, int type)
{
if (type == BTRFS_TREE_BLOCK_REF_KEY) {
if (ref1->root < ref2->root)
return -1;
if (ref1->root > ref2->root)
return 1;
} else {
if (ref1->parent < ref2->parent)
return -1;
if (ref1->parent > ref2->parent)
return 1;
}
return 0;
}
/*
* compare two delayed data backrefs with same bytenr and type
*/
static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
struct btrfs_delayed_data_ref *ref1)
{
if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
if (ref1->root < ref2->root)
return -1;
if (ref1->root > ref2->root)
return 1;
if (ref1->objectid < ref2->objectid)
return -1;
if (ref1->objectid > ref2->objectid)
return 1;
if (ref1->offset < ref2->offset)
return -1;
if (ref1->offset > ref2->offset)
return 1;
} else {
if (ref1->parent < ref2->parent)
return -1;
if (ref1->parent > ref2->parent)
return 1;
}
return 0;
}
/*
* entries in the rb tree are ordered by the byte number of the extent,
* type of the delayed backrefs and content of delayed backrefs.
*/
static int comp_entry(struct btrfs_delayed_ref_node *ref2,
struct btrfs_delayed_ref_node *ref1,
bool compare_seq)
{
if (ref1->bytenr < ref2->bytenr)
return -1;
if (ref1->bytenr > ref2->bytenr)
return 1;
if (ref1->is_head && ref2->is_head)
return 0;
if (ref2->is_head)
return -1;
if (ref1->is_head)
return 1;
if (ref1->type < ref2->type)
return -1;
if (ref1->type > ref2->type)
return 1;
if (ref1->no_quota > ref2->no_quota)
return 1;
if (ref1->no_quota < ref2->no_quota)
return -1;
/* merging of sequenced refs is not allowed */
if (compare_seq) {
if (ref1->seq < ref2->seq)
return -1;
if (ref1->seq > ref2->seq)
return 1;
}
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
btrfs_delayed_node_to_tree_ref(ref1),
ref1->type);
} else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
btrfs_delayed_node_to_data_ref(ref1));
}
BUG();
return 0;
}
/*
* insert a new ref into the rbtree. This returns any existing refs
* for the same (bytenr,parent) tuple, or NULL if the new node was properly
* inserted.
*/
static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
struct rb_node *node)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;
struct btrfs_delayed_ref_node *ins;
int cmp;
ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
rb_node);
cmp = comp_entry(entry, ins, 1);
if (cmp < 0)
p = &(*p)->rb_left;
else if (cmp > 0)
p = &(*p)->rb_right;
else
return entry;
}
rb_link_node(node, parent_node, p);
rb_insert_color(node, root);
return NULL;
}
/* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
struct rb_node *node)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_head *entry;
struct btrfs_delayed_ref_head *ins;
u64 bytenr;
ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
bytenr = ins->node.bytenr;
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
href_node);
if (bytenr < entry->node.bytenr)
p = &(*p)->rb_left;
else if (bytenr > entry->node.bytenr)
p = &(*p)->rb_right;
else
return entry;
}
rb_link_node(node, parent_node, p);
rb_insert_color(node, root);
return NULL;
}
/*
* find an head entry based on bytenr. This returns the delayed ref
* head if it was able to find one, or NULL if nothing was in that spot.
* If return_bigger is given, the next bigger entry is returned if no exact
* match is found.
*/
static struct btrfs_delayed_ref_head *
find_ref_head(struct rb_root *root, u64 bytenr,
int return_bigger)
{
struct rb_node *n;
struct btrfs_delayed_ref_head *entry;
n = root->rb_node;
entry = NULL;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
if (bytenr < entry->node.bytenr)
n = n->rb_left;
else if (bytenr > entry->node.bytenr)
n = n->rb_right;
else
return entry;
}
if (entry && return_bigger) {
if (bytenr > entry->node.bytenr) {
n = rb_next(&entry->href_node);
if (!n)
n = rb_first(root);
entry = rb_entry(n, struct btrfs_delayed_ref_head,
href_node);
return entry;
}
return entry;
}
return NULL;
}
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
assert_spin_locked(&delayed_refs->lock);
if (mutex_trylock(&head->mutex))
return 0;
atomic_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex);
spin_lock(&delayed_refs->lock);
if (!head->node.in_tree) {
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
return -EAGAIN;
}
btrfs_put_delayed_ref(&head->node);
return 0;
}
static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref)
{
if (btrfs_delayed_ref_is_head(ref)) {
head = btrfs_delayed_node_to_head(ref);
rb_erase(&head->href_node, &delayed_refs->href_root);
} else {
assert_spin_locked(&head->lock);
rb_erase(&ref->rb_node, &head->ref_root);
}
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries);
if (trans->delayed_ref_updates)
trans->delayed_ref_updates--;
}
static int merge_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref, u64 seq)
{
struct rb_node *node;
int mod = 0;
int done = 0;
node = rb_next(&ref->rb_node);
while (!done && node) {
struct btrfs_delayed_ref_node *next;
next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
node = rb_next(node);
if (seq && next->seq >= seq)
break;
if (comp_entry(ref, next, 0))
continue;
if (ref->action == next->action) {
mod = next->ref_mod;
} else {
if (ref->ref_mod < next->ref_mod) {
struct btrfs_delayed_ref_node *tmp;
tmp = ref;
ref = next;
next = tmp;
done = 1;
}
mod = -next->ref_mod;
}
drop_delayed_ref(trans, delayed_refs, head, next);
ref->ref_mod += mod;
if (ref->ref_mod == 0) {
drop_delayed_ref(trans, delayed_refs, head, ref);
done = 1;
} else {
/*
* You can't have multiples of the same ref on a tree
* block.
*/
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
}
}
return done;
}
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
struct rb_node *node;
u64 seq = 0;
assert_spin_locked(&head->lock);
/*
* We don't have too much refs to merge in the case of delayed data
* refs.
*/
if (head->is_data)
return;
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
struct seq_list *elem;
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
seq = elem->seq;
}
spin_unlock(&fs_info->tree_mod_seq_lock);
node = rb_first(&head->ref_root);
while (node) {
struct btrfs_delayed_ref_node *ref;
ref = rb_entry(node, struct btrfs_delayed_ref_node,
rb_node);
/* We can't merge refs that are outside of our seq count */
if (seq && ref->seq >= seq)
break;
if (merge_ref(trans, delayed_refs, head, ref, seq))
node = rb_first(&head->ref_root);
else
node = rb_next(&ref->rb_node);
}
}
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
int ret = 0;
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
(u32)(seq >> 32), (u32)seq,
(u32)(elem->seq >> 32), (u32)elem->seq,
delayed_refs);
ret = 1;
}
}
spin_unlock(&fs_info->tree_mod_seq_lock);
return ret;
}
struct btrfs_delayed_ref_head *
btrfs_select_ref_head(struct btrfs_trans_handle *trans)
{
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_head *head;
u64 start;
bool loop = false;
delayed_refs = &trans->transaction->delayed_refs;
again:
start = delayed_refs->run_delayed_start;
head = find_ref_head(&delayed_refs->href_root, start, 1);
if (!head && !loop) {
delayed_refs->run_delayed_start = 0;
start = 0;
loop = true;
head = find_ref_head(&delayed_refs->href_root, start, 1);
if (!head)
return NULL;
} else if (!head && loop) {
return NULL;
}
while (head->processing) {
struct rb_node *node;
node = rb_next(&head->href_node);
if (!node) {
if (loop)
return NULL;
delayed_refs->run_delayed_start = 0;
start = 0;
loop = true;
goto again;
}
head = rb_entry(node, struct btrfs_delayed_ref_head,
href_node);
}
head->processing = 1;
WARN_ON(delayed_refs->num_heads_ready == 0);
delayed_refs->num_heads_ready--;
delayed_refs->run_delayed_start = head->node.bytenr +
head->node.num_bytes;
return head;
}
/*
* helper function to update an extent delayed ref in the
* rbtree. existing and update must both have the same
* bytenr and parent
*
* This may free existing if the update cancels out whatever
* operation it was doing.
*/
static noinline void
update_existing_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *existing,
struct btrfs_delayed_ref_node *update)
{
if (update->action != existing->action) {
/*
* this is effectively undoing either an add or a
* drop. We decrement the ref_mod, and if it goes
* down to zero we just delete the entry without
* every changing the extent allocation tree.
*/
existing->ref_mod--;
if (existing->ref_mod == 0)
drop_delayed_ref(trans, delayed_refs, head, existing);
else
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
} else {
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
/*
* the action on the existing ref matches
* the action on the ref we're trying to add.
* Bump the ref_mod by one so the backref that
* is eventually added/removed has the correct
* reference count
*/
existing->ref_mod += update->ref_mod;
}
}
/*
* helper function to update the accounting in the head ref
* existing and update must have the same bytenr
*/
static noinline void
update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
struct btrfs_delayed_ref_node *update)
{
struct btrfs_delayed_ref_head *existing_ref;
struct btrfs_delayed_ref_head *ref;
existing_ref = btrfs_delayed_node_to_head(existing);
ref = btrfs_delayed_node_to_head(update);
BUG_ON(existing_ref->is_data != ref->is_data);
spin_lock(&existing_ref->lock);
if (ref->must_insert_reserved) {
/* if the extent was freed and then
* reallocated before the delayed ref
* entries were processed, we can end up
* with an existing head ref without
* the must_insert_reserved flag set.
* Set it again here
*/
existing_ref->must_insert_reserved = ref->must_insert_reserved;
/*
* update the num_bytes so we make sure the accounting
* is done correctly
*/
existing->num_bytes = update->num_bytes;
}
if (ref->extent_op) {
if (!existing_ref->extent_op) {
existing_ref->extent_op = ref->extent_op;
} else {
if (ref->extent_op->update_key) {
memcpy(&existing_ref->extent_op->key,
&ref->extent_op->key,
sizeof(ref->extent_op->key));
existing_ref->extent_op->update_key = 1;
}
if (ref->extent_op->update_flags) {
existing_ref->extent_op->flags_to_set |=
ref->extent_op->flags_to_set;
existing_ref->extent_op->update_flags = 1;
}
btrfs_free_delayed_extent_op(ref->extent_op);
}
}
/*
* update the reference mod on the head to reflect this new operation,
* only need the lock for this case cause we could be processing it
* currently, for refs we just added we know we're a-ok.
*/
existing->ref_mod += update->ref_mod;
spin_unlock(&existing_ref->lock);
}
/*
* helper function to actually insert a head node into the rbtree.
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
*/
static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, int action, int is_data)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
/*
* the head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
if (action == BTRFS_UPDATE_DELAYED_HEAD)
count_mod = 0;
else if (action == BTRFS_DROP_DELAYED_REF)
count_mod = -1;
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update
* the reserved accounting when the extent is finally added, or
* if a later modification deletes the delayed ref without ever
* inserting the extent into the extent allocation tree.
* ref->must_insert_reserved is the flag used to record
* that accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required.
*/
if (action == BTRFS_ADD_DELAYED_EXTENT)
must_insert_reserved = 1;
else
must_insert_reserved = 0;
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = count_mod;
ref->type = 0;
ref->action = 0;
ref->is_head = 1;
ref->in_tree = 1;
ref->seq = 0;
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
head_ref->ref_root = RB_ROOT;
head_ref->processing = 0;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
trace_add_delayed_ref_head(ref, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
if (existing) {
update_existing_head_ref(&existing->node, ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
head_ref = existing;
} else {
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
return head_ref;
}
/*
* helper to insert a delayed tree ref into the rbtree.
*/
static noinline void
add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, int level,
int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = 1;
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
full_ref->parent = parent;
full_ref->root = ref_root;
if (parent)
ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
else
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
full_ref->level = level;
trace_add_delayed_tree_ref(ref, full_ref, action);
spin_lock(&head_ref->lock);
existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
if (existing) {
update_existing_ref(trans, delayed_refs, head_ref, existing,
ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
} else {
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
spin_unlock(&head_ref->lock);
}
/*
* helper to insert a delayed data ref into the rbtree.
*/
static noinline void
add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
u64 offset, int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
delayed_refs = &trans->transaction->delayed_refs;
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = 1;
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
full_ref->parent = parent;
full_ref->root = ref_root;
if (parent)
ref->type = BTRFS_SHARED_DATA_REF_KEY;
else
ref->type = BTRFS_EXTENT_DATA_REF_KEY;
full_ref->objectid = owner;
full_ref->offset = offset;
trace_add_delayed_data_ref(ref, full_ref, action);
spin_lock(&head_ref->lock);
existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
if (existing) {
update_existing_ref(trans, delayed_refs, head_ref, existing,
ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
} else {
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
spin_unlock(&head_ref->lock);
}
/*
* add a delayed tree ref. This does all of the accounting required
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int no_quota)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
return -ENOMEM;
}
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
/*
* insert both the head node and the new ref without dropping
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
bytenr, num_bytes, action, 0);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
no_quota);
spin_unlock(&delayed_refs->lock);
return 0;
}
/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int no_quota)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
return -ENOMEM;
}
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
/*
* insert both the head node and the new ref without dropping
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
bytenr, num_bytes, action, 1);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, no_quota);
spin_unlock(&delayed_refs->lock);
return 0;
}
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref)
return -ENOMEM;
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
spin_unlock(&delayed_refs->lock);
return 0;
}
/*
* this does a simple search for the head node for a given extent.
* It must be called with the delayed ref spinlock held, and it returns
* the head node if any where found, or NULL if not.
*/
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
{
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
return find_ref_head(&delayed_refs->href_root, bytenr, 0);
}
void btrfs_delayed_ref_exit(void)
{
if (btrfs_delayed_ref_head_cachep)
kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
if (btrfs_delayed_tree_ref_cachep)
kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
if (btrfs_delayed_data_ref_cachep)
kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
if (btrfs_delayed_extent_op_cachep)
kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
{
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",
sizeof(struct btrfs_delayed_ref_head), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
btrfs_delayed_tree_ref_cachep = kmem_cache_create(
"btrfs_delayed_tree_ref",
sizeof(struct btrfs_delayed_tree_ref), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
btrfs_delayed_data_ref_cachep = kmem_cache_create(
"btrfs_delayed_data_ref",
sizeof(struct btrfs_delayed_data_ref), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
btrfs_delayed_extent_op_cachep = kmem_cache_create(
"btrfs_delayed_extent_op",
sizeof(struct btrfs_delayed_extent_op), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
return 0;
fail:
btrfs_delayed_ref_exit();
return -ENOMEM;
}

266
fs/btrfs/delayed-ref.h Normal file
View file

@ -0,0 +1,266 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __DELAYED_REF__
#define __DELAYED_REF__
/* these are the possible values of struct btrfs_delayed_ref_node->action */
#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
struct btrfs_delayed_ref_node {
struct rb_node rb_node;
/* the starting bytenr of the extent */
u64 bytenr;
/* the size of the extent */
u64 num_bytes;
/* seq number to keep track of insertion order */
u64 seq;
/* ref count on this data structure */
atomic_t refs;
/*
* how many refs is this entry adding or deleting. For
* head refs, this may be a negative number because it is keeping
* track of the total mods done to the reference count.
* For individual refs, this will always be a positive number
*
* It may be more than one, since it is possible for a single
* parent to have more than one ref on an extent
*/
int ref_mod;
unsigned int action:8;
unsigned int type:8;
unsigned int no_quota:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
};
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
u64 flags_to_set;
int level;
unsigned int update_key:1;
unsigned int update_flags:1;
unsigned int is_data:1;
};
/*
* the head refs are used to hold a lock on a given extent, which allows us
* to make sure that only one process is running the delayed refs
* at a time for a single extent. They also store the sum of all the
* reference count modifications we've queued up.
*/
struct btrfs_delayed_ref_head {
struct btrfs_delayed_ref_node node;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
*/
struct mutex mutex;
spinlock_t lock;
struct rb_root ref_root;
struct rb_node href_node;
struct btrfs_delayed_extent_op *extent_op;
/*
* when a new extent is allocated, it is just reserved in memory
* The actual extent isn't inserted into the extent allocation tree
* until the delayed ref is processed. must_insert_reserved is
* used to flag a delayed ref so the accounting can be updated
* when a full insert is done.
*
* It is possible the extent will be freed before it is ever
* inserted into the extent allocation tree. In this case
* we need to update the in ram accounting to properly reflect
* the free has happened.
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
unsigned int processing:1;
};
struct btrfs_delayed_tree_ref {
struct btrfs_delayed_ref_node node;
u64 root;
u64 parent;
int level;
};
struct btrfs_delayed_data_ref {
struct btrfs_delayed_ref_node node;
u64 root;
u64 parent;
u64 objectid;
u64 offset;
};
struct btrfs_delayed_ref_root {
/* head ref rbtree */
struct rb_root href_root;
/* this spin lock protects the rbtree and the entries inside */
spinlock_t lock;
/* how many delayed ref updates we've queued, used by the
* throttling code
*/
atomic_t num_entries;
/* total number of head nodes in tree */
unsigned long num_heads;
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;
/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
* to be run right away
*/
int flushing;
u64 run_delayed_start;
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int btrfs_delayed_ref_init(void);
void btrfs_delayed_ref_exit(void);
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
{
return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS);
}
static inline void
btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
{
if (op)
kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
}
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(atomic_read(&ref->refs) == 0);
if (atomic_dec_and_test(&ref->refs)) {
WARN_ON(ref->in_tree);
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
break;
case 0:
kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
break;
default:
BUG();
}
}
}
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int no_quota);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int no_quota);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
{
mutex_unlock(&head->mutex);
}
struct btrfs_delayed_ref_head *
btrfs_select_ref_head(struct btrfs_trans_handle *trans);
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
*/
static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
{
return node->is_head;
}
/*
* helper functions to cast a node into its container
*/
static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
{
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_tree_ref, node);
}
static inline struct btrfs_delayed_data_ref *
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
{
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_data_ref, node);
}
static inline struct btrfs_delayed_ref_head *
btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
{
WARN_ON(!btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_ref_head, node);
}
#endif

944
fs/btrfs/dev-replace.c Normal file
View file

@ -0,0 +1,944 @@
/*
* Copyright (C) STRATO AG 2012. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
#include <linux/kthread.h>
#include <linux/math64.h>
#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
#include "transaction.h"
#include "print-tree.h"
#include "volumes.h"
#include "async-thread.h"
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
#include "sysfs.h"
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev);
static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
char *srcdev_name,
struct btrfs_device **device);
static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
static int btrfs_dev_replace_kthread(void *data);
static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
{
struct btrfs_key key;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct extent_buffer *eb;
int slot;
int ret = 0;
struct btrfs_path *path = NULL;
int item_size;
struct btrfs_dev_replace_item *ptr;
u64 src_devid;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
no_valid_dev_replace_entry_found:
ret = 0;
dev_replace->replace_state =
BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED;
dev_replace->cont_reading_from_srcdev_mode =
BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS;
dev_replace->replace_state = 0;
dev_replace->time_started = 0;
dev_replace->time_stopped = 0;
atomic64_set(&dev_replace->num_write_errors, 0);
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
dev_replace->cursor_right = 0;
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
dev_replace->is_valid = 0;
dev_replace->item_needs_writeback = 0;
goto out;
}
slot = path->slots[0];
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, slot);
ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item);
if (item_size != sizeof(struct btrfs_dev_replace_item)) {
btrfs_warn(fs_info,
"dev_replace entry found has unexpected size, ignore entry");
goto no_valid_dev_replace_entry_found;
}
src_devid = btrfs_dev_replace_src_devid(eb, ptr);
dev_replace->cont_reading_from_srcdev_mode =
btrfs_dev_replace_cont_reading_from_srcdev_mode(eb, ptr);
dev_replace->replace_state = btrfs_dev_replace_replace_state(eb, ptr);
dev_replace->time_started = btrfs_dev_replace_time_started(eb, ptr);
dev_replace->time_stopped =
btrfs_dev_replace_time_stopped(eb, ptr);
atomic64_set(&dev_replace->num_write_errors,
btrfs_dev_replace_num_write_errors(eb, ptr));
atomic64_set(&dev_replace->num_uncorrectable_read_errors,
btrfs_dev_replace_num_uncorrectable_read_errors(eb, ptr));
dev_replace->cursor_left = btrfs_dev_replace_cursor_left(eb, ptr);
dev_replace->committed_cursor_left = dev_replace->cursor_left;
dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left;
dev_replace->cursor_right = btrfs_dev_replace_cursor_right(eb, ptr);
dev_replace->is_valid = 1;
dev_replace->item_needs_writeback = 0;
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
dev_replace->srcdev = btrfs_find_device(fs_info, src_devid,
NULL, NULL);
dev_replace->tgtdev = btrfs_find_device(fs_info,
BTRFS_DEV_REPLACE_DEVID,
NULL, NULL);
/*
* allow 'btrfs dev replace_cancel' if src/tgt device is
* missing
*/
if (!dev_replace->srcdev &&
!btrfs_test_opt(dev_root, DEGRADED)) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
btrfs_warn(fs_info,
"srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
src_devid);
}
if (!dev_replace->tgtdev &&
!btrfs_test_opt(dev_root, DEGRADED)) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
btrfs_warn(fs_info,
"tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
BTRFS_DEV_REPLACE_DEVID);
}
if (dev_replace->tgtdev) {
if (dev_replace->srcdev) {
dev_replace->tgtdev->total_bytes =
dev_replace->srcdev->total_bytes;
dev_replace->tgtdev->disk_total_bytes =
dev_replace->srcdev->disk_total_bytes;
dev_replace->tgtdev->commit_total_bytes =
dev_replace->srcdev->commit_total_bytes;
dev_replace->tgtdev->bytes_used =
dev_replace->srcdev->bytes_used;
dev_replace->tgtdev->commit_bytes_used =
dev_replace->srcdev->commit_bytes_used;
}
dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1;
btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
dev_replace->tgtdev);
}
break;
}
out:
if (path)
btrfs_free_path(path);
return ret;
}
/*
* called from commit_transaction. Writes changed device replace state to
* disk.
*/
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
int ret;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *eb;
struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace);
if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) {
btrfs_dev_replace_unlock(dev_replace);
return 0;
}
btrfs_dev_replace_unlock(dev_replace);
key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY;
key.offset = 0;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
btrfs_warn(fs_info, "error %d while searching for dev_replace item!",
ret);
goto out;
}
if (ret == 0 &&
btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
/*
* need to delete old one and insert a new one.
* Since no attempt is made to recover any old state, if the
* dev_replace state is 'running', the data on the target
* drive is lost.
* It would be possible to recover the state: just make sure
* that the beginning of the item is never changed and always
* contains all the essential information. Then read this
* minimal set of information and use it as a base for the
* new state.
*/
ret = btrfs_del_item(trans, dev_root, path);
if (ret != 0) {
btrfs_warn(fs_info, "delete too small dev_replace item failed %d!",
ret);
goto out;
}
ret = 1;
}
if (ret == 1) {
/* need to insert a new item */
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, dev_root, path,
&key, sizeof(*ptr));
if (ret < 0) {
btrfs_warn(fs_info, "insert dev_replace item failed %d!",
ret);
goto out;
}
}
eb = path->nodes[0];
ptr = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_dev_replace_item);
btrfs_dev_replace_lock(dev_replace);
if (dev_replace->srcdev)
btrfs_set_dev_replace_src_devid(eb, ptr,
dev_replace->srcdev->devid);
else
btrfs_set_dev_replace_src_devid(eb, ptr, (u64)-1);
btrfs_set_dev_replace_cont_reading_from_srcdev_mode(eb, ptr,
dev_replace->cont_reading_from_srcdev_mode);
btrfs_set_dev_replace_replace_state(eb, ptr,
dev_replace->replace_state);
btrfs_set_dev_replace_time_started(eb, ptr, dev_replace->time_started);
btrfs_set_dev_replace_time_stopped(eb, ptr, dev_replace->time_stopped);
btrfs_set_dev_replace_num_write_errors(eb, ptr,
atomic64_read(&dev_replace->num_write_errors));
btrfs_set_dev_replace_num_uncorrectable_read_errors(eb, ptr,
atomic64_read(&dev_replace->num_uncorrectable_read_errors));
dev_replace->cursor_left_last_write_of_item =
dev_replace->cursor_left;
btrfs_set_dev_replace_cursor_left(eb, ptr,
dev_replace->cursor_left_last_write_of_item);
btrfs_set_dev_replace_cursor_right(eb, ptr,
dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0;
btrfs_dev_replace_unlock(dev_replace);
btrfs_mark_buffer_dirty(eb);
out:
btrfs_free_path(path);
return ret;
}
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
dev_replace->committed_cursor_left =
dev_replace->cursor_left_last_write_of_item;
}
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args)
{
struct btrfs_trans_handle *trans;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int ret;
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
if (btrfs_fs_incompat(fs_info, RAID56)) {
btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
return -EOPNOTSUPP;
}
switch (args->start.cont_reading_from_srcdev_mode) {
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
break;
default:
return -EINVAL;
}
if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
args->start.tgtdev_name[0] == '\0')
return -EINVAL;
/*
* Here we commit the transaction to make sure commit_total_bytes
* of all the devices are updated.
*/
trans = btrfs_attach_transaction(root);
if (!IS_ERR(trans)) {
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
} else if (PTR_ERR(trans) != -ENOENT) {
return PTR_ERR(trans);
}
/* the disk copy procedure reuses the scrub code */
mutex_lock(&fs_info->volume_mutex);
ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid,
args->start.srcdev_name,
&src_device);
if (ret) {
mutex_unlock(&fs_info->volume_mutex);
return ret;
}
ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
src_device, &tgt_device);
mutex_unlock(&fs_info->volume_mutex);
if (ret)
return ret;
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
goto leave;
}
dev_replace->cont_reading_from_srcdev_mode =
args->start.cont_reading_from_srcdev_mode;
WARN_ON(!src_device);
dev_replace->srcdev = src_device;
WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device;
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
rcu_str_deref(tgt_device->name));
/*
* from now on, the writes to the srcdev are all duplicated to
* go to the tgtdev as well (refer to btrfs_map_block()).
*/
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
dev_replace->time_started = get_seconds();
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
dev_replace->cursor_right = 0;
dev_replace->is_valid = 1;
dev_replace->item_needs_writeback = 1;
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
btrfs_wait_ordered_roots(root->fs_info, -1);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_dev_replace_lock(dev_replace);
goto leave;
}
ret = btrfs_commit_transaction(trans, root);
WARN_ON(ret);
/* the disk copy procedure reuses the scrub code */
ret = btrfs_scrub_dev(fs_info, src_device->devid, 0,
btrfs_device_get_total_bytes(src_device),
&dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(root->fs_info, ret);
WARN_ON(ret);
return 0;
leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
btrfs_dev_replace_unlock(dev_replace);
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
return ret;
}
/*
* blocked until all flighting bios are finished.
*/
static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
{
s64 writers;
DEFINE_WAIT(wait);
set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
do {
prepare_to_wait(&fs_info->replace_wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&fs_info->bio_counter);
if (writers)
schedule();
finish_wait(&fs_info->replace_wait, &wait);
} while (writers);
}
/*
* we have removed target device, it is safe to allow new bios request.
*/
static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
{
clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
if (waitqueue_active(&fs_info->replace_wait))
wake_up(&fs_info->replace_wait);
}
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *tgt_device;
struct btrfs_device *src_device;
struct btrfs_root *root = fs_info->tree_root;
u8 uuid_tmp[BTRFS_UUID_SIZE];
struct btrfs_trans_handle *trans;
int ret = 0;
/* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
/* was the operation canceled, or is it finished? */
if (dev_replace->replace_state !=
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
btrfs_dev_replace_unlock(dev_replace);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev;
btrfs_dev_replace_unlock(dev_replace);
/*
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
btrfs_wait_ordered_roots(root->fs_info, -1);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans);
}
ret = btrfs_commit_transaction(trans, root);
WARN_ON(ret);
mutex_lock(&uuid_mutex);
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
mutex_lock(&root->fs_info->chunk_mutex);
btrfs_dev_replace_lock(dev_replace);
dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
/* replace old device with new one in mapping tree */
if (!scrub_ret) {
btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
src_device,
tgt_device);
} else {
printk_in_rcu(KERN_ERR
"BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace);
mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s finished\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
rcu_str_deref(tgt_device->name));
tgt_device->is_tgtdev_for_dev_replace = 0;
tgt_device->devid = src_device->devid;
src_device->devid = BTRFS_DEV_REPLACE_DEVID;
memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp));
memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid));
memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid));
btrfs_device_set_total_bytes(tgt_device, src_device->total_bytes);
btrfs_device_set_disk_total_bytes(tgt_device,
src_device->disk_total_bytes);
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
ASSERT(list_empty(&src_device->resized_list));
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used;
if (fs_info->sb->s_bdev == src_device->bdev)
fs_info->sb->s_bdev = tgt_device->bdev;
if (fs_info->fs_devices->latest_bdev == src_device->bdev)
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
/* replace the sysfs entry */
btrfs_kobj_rm_device(fs_info, src_device);
btrfs_kobj_add_device(fs_info, tgt_device);
btrfs_dev_replace_unlock(dev_replace);
btrfs_rm_dev_replace_blocked(fs_info);
btrfs_rm_dev_replace_srcdev(fs_info, src_device);
btrfs_rm_dev_replace_unblocked(fs_info);
/*
* this is again a consistent state where no dev_replace procedure
* is running, the target device is part of the filesystem, the
* source device is not part of the filesystem anymore and its 1st
* superblock is scratched out so that it is no longer marked to
* belong to this filesystem.
*/
mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
if (!IS_ERR(trans))
btrfs_commit_transaction(trans, root);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev)
{
struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
struct extent_map *em;
struct map_lookup *map;
u64 start = 0;
int i;
write_lock(&em_tree->lock);
do {
em = lookup_extent_mapping(em_tree, start, (u64)-1);
if (!em)
break;
map = (struct map_lookup *)em->bdev;
for (i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
start = em->start + em->len;
free_extent_map(em);
} while (start);
write_unlock(&em_tree->lock);
}
static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
char *srcdev_name,
struct btrfs_device **device)
{
int ret;
if (srcdevid) {
ret = 0;
*device = btrfs_find_device(root->fs_info, srcdevid, NULL,
NULL);
if (!*device)
ret = -ENOENT;
} else {
ret = btrfs_find_device_missing_or_by_path(root, srcdev_name,
device);
}
return ret;
}
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *srcdev;
btrfs_dev_replace_lock(dev_replace);
/* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
args->status.replace_state = dev_replace->replace_state;
args->status.time_started = dev_replace->time_started;
args->status.time_stopped = dev_replace->time_stopped;
args->status.num_write_errors =
atomic64_read(&dev_replace->num_write_errors);
args->status.num_uncorrectable_read_errors =
atomic64_read(&dev_replace->num_uncorrectable_read_errors);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
args->status.progress_1000 = 0;
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
args->status.progress_1000 = 1000;
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
srcdev = dev_replace->srcdev;
args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
div64_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break;
}
btrfs_dev_replace_unlock(dev_replace);
}
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
args->result = __btrfs_dev_replace_cancel(fs_info);
return 0;
}
static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *tgt_device = NULL;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = fs_info->tree_root;
u64 result;
int ret;
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
btrfs_dev_replace_unlock(dev_replace);
goto leave;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
tgt_device = dev_replace->tgtdev;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
break;
}
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans);
}
ret = btrfs_commit_transaction(trans, root);
WARN_ON(ret);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
leave:
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return result;
}
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_info(fs_info, "suspending dev_replace for unmount");
break;
}
btrfs_dev_replace_unlock(dev_replace);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
}
/* resume dev_replace procedure that was interrupted by unmount */
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
{
struct task_struct *task;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
btrfs_dev_replace_unlock(dev_replace);
return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
break;
}
if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) {
btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
btrfs_info(fs_info,
"you may cancel the operation after 'mount -o degraded'");
btrfs_dev_replace_unlock(dev_replace);
return 0;
}
btrfs_dev_replace_unlock(dev_replace);
WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1));
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
return PTR_ERR_OR_ZERO(task);
}
static int btrfs_dev_replace_kthread(void *data)
{
struct btrfs_fs_info *fs_info = data;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_ioctl_dev_replace_args *status_args;
u64 progress;
status_args = kzalloc(sizeof(*status_args), GFP_NOFS);
if (status_args) {
btrfs_dev_replace_status(fs_info, status_args);
progress = status_args->status.progress_1000;
kfree(status_args);
do_div(progress, 10);
printk_in_rcu(KERN_INFO
"BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
dev_replace->srcdev->missing ? "<missing disk>" :
rcu_str_deref(dev_replace->srcdev->name),
dev_replace->srcdev->devid,
dev_replace->tgtdev ?
rcu_str_deref(dev_replace->tgtdev->name) :
"<missing target disk>",
(unsigned int)progress);
}
btrfs_dev_replace_continue_on_mount(fs_info);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
return 0;
}
static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int ret;
ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid,
dev_replace->committed_cursor_left,
btrfs_device_get_total_bytes(dev_replace->srcdev),
&dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(fs_info, ret);
WARN_ON(ret);
return 0;
}
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
{
if (!dev_replace->is_valid)
return 0;
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
/*
* return true even if tgtdev is missing (this is
* something that can happen if the dev_replace
* procedure is suspended by an umount and then
* the tgtdev is missing (or "btrfs dev scan") was
* not called and the the filesystem is remounted
* in degraded state. This does not stop the
* dev_replace procedure. It needs to be canceled
* manually if the cancelation is wanted.
*/
break;
}
return 1;
}
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
{
/* the beginning is just an optimization for the typical case */
if (atomic_read(&dev_replace->nesting_level) == 0) {
acquire_lock:
/* this is not a nested case where the same thread
* is trying to acqurire the same lock twice */
mutex_lock(&dev_replace->lock);
mutex_lock(&dev_replace->lock_management_lock);
dev_replace->lock_owner = current->pid;
atomic_inc(&dev_replace->nesting_level);
mutex_unlock(&dev_replace->lock_management_lock);
return;
}
mutex_lock(&dev_replace->lock_management_lock);
if (atomic_read(&dev_replace->nesting_level) > 0 &&
dev_replace->lock_owner == current->pid) {
WARN_ON(!mutex_is_locked(&dev_replace->lock));
atomic_inc(&dev_replace->nesting_level);
mutex_unlock(&dev_replace->lock_management_lock);
return;
}
mutex_unlock(&dev_replace->lock_management_lock);
goto acquire_lock;
}
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
{
WARN_ON(!mutex_is_locked(&dev_replace->lock));
mutex_lock(&dev_replace->lock_management_lock);
WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
WARN_ON(dev_replace->lock_owner != current->pid);
atomic_dec(&dev_replace->nesting_level);
if (atomic_read(&dev_replace->nesting_level) == 0) {
dev_replace->lock_owner = 0;
mutex_unlock(&dev_replace->lock_management_lock);
mutex_unlock(&dev_replace->lock);
} else {
mutex_unlock(&dev_replace->lock_management_lock);
}
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
{
percpu_counter_inc(&fs_info->bio_counter);
}
void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
{
percpu_counter_dec(&fs_info->bio_counter);
if (waitqueue_active(&fs_info->replace_wait))
wake_up(&fs_info->replace_wait);
}
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
{
DEFINE_WAIT(wait);
again:
percpu_counter_inc(&fs_info->bio_counter);
if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) {
btrfs_bio_counter_dec(fs_info);
wait_event(fs_info->replace_wait,
!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
&fs_info->fs_state));
goto again;
}
}

44
fs/btrfs/dev-replace.h Normal file
View file

@ -0,0 +1,44 @@
/*
* Copyright (C) STRATO AG 2012. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#if !defined(__BTRFS_DEV_REPLACE__)
#define __BTRFS_DEV_REPLACE__
struct btrfs_ioctl_dev_replace_args;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{
atomic64_inc(stat_value);
}
#endif

486
fs/btrfs/dir-item.c Normal file
View file

@ -0,0 +1,486 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
#include "transaction.h"
static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len);
/*
* insert a name into a directory, doing overflow properly if there is a hash
* collision. data_size indicates how big the item inserted should be. On
* success a struct btrfs_dir_item pointer is returned, otherwise it is
* an ERR_PTR.
*
* The name is not copied into the dir item, you have to do that yourself.
*/
static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
*trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *cpu_key,
u32 data_size,
const char *name,
int name_len)
{
int ret;
char *ptr;
struct btrfs_item *item;
struct extent_buffer *leaf;
ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
if (ret == -EEXIST) {
struct btrfs_dir_item *di;
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di)
return ERR_PTR(-EEXIST);
btrfs_extend_item(root, path, data_size);
} else if (ret < 0)
return ERR_PTR(ret);
WARN_ON(ret > 0);
leaf = path->nodes[0];
item = btrfs_item_nr(path->slots[0]);
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
BUG_ON(data_size > btrfs_item_size(leaf, item));
ptr += btrfs_item_size(leaf, item) - data_size;
return (struct btrfs_dir_item *)ptr;
}
/*
* xattrs work a lot like directories, this inserts an xattr item
* into the tree
*/
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
const char *name, u16 name_len,
const void *data, u16 data_len)
{
int ret = 0;
struct btrfs_dir_item *dir_item;
unsigned long name_ptr, data_ptr;
struct btrfs_key key, location;
struct btrfs_disk_key disk_key;
struct extent_buffer *leaf;
u32 data_size;
BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
key.objectid = objectid;
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len);
if (IS_ERR(dir_item))
return PTR_ERR(dir_item);
memset(&location, 0, sizeof(location));
leaf = path->nodes[0];
btrfs_cpu_key_to_disk(&disk_key, &location);
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
btrfs_set_dir_name_len(leaf, dir_item, name_len);
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
btrfs_set_dir_data_len(leaf, dir_item, data_len);
name_ptr = (unsigned long)(dir_item + 1);
data_ptr = (unsigned long)((char *)name_ptr + name_len);
write_extent_buffer(leaf, name, name_ptr, name_len);
write_extent_buffer(leaf, data, data_ptr, data_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
return ret;
}
/*
* insert a directory item in the tree, doing all the magic for
* both indexes. 'dir' indicates which objectid to insert it into,
* 'location' is the key to stuff into the directory item, 'type' is the
* type of the inode we're pointing to, and 'index' is the sequence number
* to use for the second index (if one is created).
* Will return 0 or -ENOMEM
*/
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, const char *name, int name_len,
struct inode *dir, struct btrfs_key *location,
u8 type, u64 index)
{
int ret = 0;
int ret2 = 0;
struct btrfs_path *path;
struct btrfs_dir_item *dir_item;
struct extent_buffer *leaf;
unsigned long name_ptr;
struct btrfs_key key;
struct btrfs_disk_key disk_key;
u32 data_size;
key.objectid = btrfs_ino(dir);
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
btrfs_cpu_key_to_disk(&disk_key, location);
data_size = sizeof(*dir_item) + name_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len);
if (IS_ERR(dir_item)) {
ret = PTR_ERR(dir_item);
if (ret == -EEXIST)
goto second_insert;
goto out_free;
}
leaf = path->nodes[0];
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
btrfs_set_dir_type(leaf, dir_item, type);
btrfs_set_dir_data_len(leaf, dir_item, 0);
btrfs_set_dir_name_len(leaf, dir_item, name_len);
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
name_ptr = (unsigned long)(dir_item + 1);
write_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_mark_buffer_dirty(leaf);
second_insert:
/* FIXME, use some real flag for selecting the extra index */
if (root == root->fs_info->tree_root) {
ret = 0;
goto out_free;
}
btrfs_release_path(path);
ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir,
&disk_key, type, index);
out_free:
btrfs_free_path(path);
if (ret)
return ret;
if (ret2)
return ret2;
return 0;
}
/*
* lookup a directory item based on name. 'dir' is the objectid
* we're searching in, and 'mod' tells us if you plan on deleting the
* item (use mod < 0) or changing the options (use mod > 0)
*/
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
const char *name, int name_len,
int mod)
{
int ret;
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return NULL;
return btrfs_match_dir_item_name(root, path, name, name_len);
}
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const char *name, int name_len)
{
int ret;
struct btrfs_key key;
struct btrfs_dir_item *di;
int data_size;
struct extent_buffer *leaf;
int slot;
struct btrfs_path *path;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* return back any errors */
if (ret < 0)
goto out;
/* nothing found, we're safe */
if (ret > 0) {
ret = 0;
goto out;
}
/* we found an item, look for our name in the item */
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di) {
/* our exact name was found */
ret = -EEXIST;
goto out;
}
/*
* see if there is room in the item to insert this
* name
*/
data_size = sizeof(*di) + name_len;
leaf = path->nodes[0];
slot = path->slots[0];
if (data_size + btrfs_item_size_nr(leaf, slot) +
sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) {
ret = -EOVERFLOW;
} else {
/* plenty of insertion room */
ret = 0;
}
out:
btrfs_free_path(path);
return ret;
}
/*
* lookup a directory item based on index. 'dir' is the objectid
* we're searching in, and 'mod' tells us if you plan on deleting the
* item (use mod < 0) or changing the options (use mod > 0)
*
* The name is used to make sure the index really points to the name you were
* looking for.
*/
struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
u64 objectid, const char *name, int name_len,
int mod)
{
int ret;
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = objectid;
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return ERR_PTR(-ENOENT);
return btrfs_match_dir_item_name(root, path, name, name_len);
}
struct btrfs_dir_item *
btrfs_search_dir_index_item(struct btrfs_root *root,
struct btrfs_path *path, u64 dirid,
const char *name, int name_len)
{
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
u32 nritems;
int ret;
key.objectid = dirid;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ERR_PTR(ret);
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
while (1) {
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
break;
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
continue;
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
break;
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di)
return di;
path->slots[0]++;
}
return NULL;
}
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
const char *name, u16 name_len,
int mod)
{
int ret;
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
key.objectid = dir;
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return NULL;
return btrfs_match_dir_item_name(root, path, name, name_len);
}
/*
* helper function to look at the directory item pointed to by 'path'
* this walks through all the entries in a dir item and finds one
* for a specific name.
*/
static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len)
{
struct btrfs_dir_item *dir_item;
unsigned long name_ptr;
u32 total_len;
u32 cur = 0;
u32 this_len;
struct extent_buffer *leaf;
leaf = path->nodes[0];
dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
if (verify_dir_item(root, leaf, dir_item))
return NULL;
total_len = btrfs_item_size_nr(leaf, path->slots[0]);
while (cur < total_len) {
this_len = sizeof(*dir_item) +
btrfs_dir_name_len(leaf, dir_item) +
btrfs_dir_data_len(leaf, dir_item);
name_ptr = (unsigned long)(dir_item + 1);
if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
return dir_item;
cur += this_len;
dir_item = (struct btrfs_dir_item *)((char *)dir_item +
this_len);
}
return NULL;
}
/*
* given a pointer into a directory item, delete it. This
* handles items that have more than one entry in them.
*/
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_dir_item *di)
{
struct extent_buffer *leaf;
u32 sub_item_len;
u32 item_len;
int ret = 0;
leaf = path->nodes[0];
sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
btrfs_dir_data_len(leaf, di);
item_len = btrfs_item_size_nr(leaf, path->slots[0]);
if (sub_item_len == item_len) {
ret = btrfs_del_item(trans, root, path);
} else {
/* MARKER */
unsigned long ptr = (unsigned long)di;
unsigned long start;
start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_len - (ptr + sub_item_len - start));
btrfs_truncate_item(root, path, item_len - sub_item_len, 1);
}
return ret;
}
int verify_dir_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_dir_item *dir_item)
{
u16 namelen = BTRFS_NAME_LEN;
u8 type = btrfs_dir_type(leaf, dir_item);
if (type >= BTRFS_FT_MAX) {
btrfs_crit(root->fs_info, "invalid dir item type: %d",
(int)type);
return 1;
}
if (type == BTRFS_FT_XATTR)
namelen = XATTR_NAME_MAX;
if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
btrfs_crit(root->fs_info, "invalid dir item name len: %u",
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
if ((btrfs_dir_data_len(leaf, dir_item) +
btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
btrfs_crit(root->fs_info, "invalid dir item name + data len: %u + %u",
(unsigned)btrfs_dir_name_len(leaf, dir_item),
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
return 0;
}

4240
fs/btrfs/disk-io.c Normal file

File diff suppressed because it is too large Load diff

159
fs/btrfs/disk-io.h Normal file
View file

@ -0,0 +1,159 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __DISKIO__
#define __DISKIO__
#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
#define BTRFS_SUPER_INFO_SIZE 4096
#define BTRFS_SUPER_MIRROR_MAX 3
#define BTRFS_SUPER_MIRROR_SHIFT 12
enum btrfs_wq_endio_type {
BTRFS_WQ_ENDIO_DATA = 0,
BTRFS_WQ_ENDIO_METADATA = 1,
BTRFS_WQ_ENDIO_FREE_SPACE = 2,
BTRFS_WQ_ENDIO_RAID56 = 3,
BTRFS_WQ_ENDIO_DIO_REPAIR = 4,
};
static inline u64 btrfs_sb_offset(int mirror)
{
u64 start = 16 * 1024;
if (mirror)
return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
return BTRFS_SUPER_INFO_OFFSET;
}
struct btrfs_device;
struct btrfs_fs_devices;
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u64 parent_transid);
void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize);
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
int mirror_num, struct extent_buffer **eb);
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
void clean_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
void close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr);
struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
struct btrfs_key *location);
int btrfs_init_fs_root(struct btrfs_root *root);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key,
bool check_ref);
static inline struct btrfs_root *
btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location)
{
return btrfs_get_fs_root(fs_info, location, true);
}
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_root *root);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(void);
#endif
/*
* This function is used to grab the root, and avoid it is freed when we
* access it. But it doesn't ensure that the tree is not dropped.
*
* If you want to ensure the whole tree is safe, you should use
* fs_info->subvol_srcu
*/
static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
{
if (atomic_inc_not_zero(&root->refs))
return root;
return NULL;
}
static inline void btrfs_put_fs_root(struct btrfs_root *root)
{
if (atomic_dec_and_test(&root->refs))
kfree(root);
}
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
int btrfs_calc_num_tolerated_disk_barrier_failures(
struct btrfs_fs_info *fs_info);
int __init btrfs_end_io_wq_init(void);
void btrfs_end_io_wq_exit(void);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level);
#else
static inline void btrfs_init_lockdep(void)
{ }
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level)
{
}
#endif
#endif

304
fs/btrfs/export.c Normal file
View file

@ -0,0 +1,304 @@
#include <linux/fs.h>
#include <linux/types.h>
#include "ctree.h"
#include "disk-io.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "export.h"
#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
parent_objectid) / 4)
#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, \
parent_root_objectid) / 4)
#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4)
static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
struct inode *parent)
{
struct btrfs_fid *fid = (struct btrfs_fid *)fh;
int len = *max_len;
int type;
if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
*max_len = BTRFS_FID_SIZE_CONNECTABLE;
return FILEID_INVALID;
} else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
*max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
return FILEID_INVALID;
}
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
fid->objectid = btrfs_ino(inode);
fid->root_objectid = BTRFS_I(inode)->root->objectid;
fid->gen = inode->i_generation;
if (parent) {
u64 parent_root_id;
fid->parent_objectid = BTRFS_I(parent)->location.objectid;
fid->parent_gen = parent->i_generation;
parent_root_id = BTRFS_I(parent)->root->objectid;
if (parent_root_id != fid->root_objectid) {
fid->parent_root_objectid = parent_root_id;
len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
type = FILEID_BTRFS_WITH_PARENT_ROOT;
} else {
len = BTRFS_FID_SIZE_CONNECTABLE;
type = FILEID_BTRFS_WITH_PARENT;
}
}
*max_len = len;
return type;
}
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
u64 root_objectid, u32 generation,
int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
struct inode *inode;
struct btrfs_key key;
int index;
int err = 0;
if (objectid < BTRFS_FIRST_FREE_OBJECTID)
return ERR_PTR(-ESTALE);
key.objectid = root_objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
index = srcu_read_lock(&fs_info->subvol_srcu);
root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto fail;
}
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(sb, &key, root, NULL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail;
}
srcu_read_unlock(&fs_info->subvol_srcu, index);
if (check_generation && generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
}
return d_obtain_alias(inode);
fail:
srcu_read_unlock(&fs_info->subvol_srcu, index);
return ERR_PTR(err);
}
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
int fh_len, int fh_type)
{
struct btrfs_fid *fid = (struct btrfs_fid *) fh;
u64 objectid, root_objectid;
u32 generation;
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
if (fh_len != BTRFS_FID_SIZE_CONNECTABLE)
return NULL;
root_objectid = fid->root_objectid;
} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
return NULL;
root_objectid = fid->parent_root_objectid;
} else
return NULL;
objectid = fid->parent_objectid;
generation = fid->parent_gen;
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
int fh_len, int fh_type)
{
struct btrfs_fid *fid = (struct btrfs_fid *) fh;
u64 objectid, root_objectid;
u32 generation;
if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
(fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
(fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
return NULL;
objectid = fid->objectid;
root_objectid = fid->root_objectid;
generation = fid->gen;
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_root_ref *ref;
struct btrfs_key key;
struct btrfs_key found_key;
int ret;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = root->fs_info->tree_root;
} else {
key.objectid = btrfs_ino(dir);
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
BUG_ON(ret == 0); /* Key with offset of -1 found */
if (path->slots[0] == 0) {
ret = -ENOENT;
goto fail;
}
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != key.objectid || found_key.type != key.type) {
ret = -ENOENT;
goto fail;
}
if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
key.objectid = btrfs_root_ref_dirid(leaf, ref);
} else {
key.objectid = found_key.offset;
}
btrfs_free_path(path);
if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
return btrfs_get_dentry(root->fs_info->sb, key.objectid,
found_key.offset, 0, 0);
}
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
}
static int btrfs_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct inode *inode = child->d_inode;
struct inode *dir = parent->d_inode;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_inode_ref *iref;
struct btrfs_root_ref *rref;
struct extent_buffer *leaf;
unsigned long name_ptr;
struct btrfs_key key;
int name_len;
int ret;
u64 ino;
if (!dir || !inode)
return -EINVAL;
if (!S_ISDIR(dir->i_mode))
return -EINVAL;
ino = btrfs_ino(inode);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = root->fs_info->tree_root;
} else {
key.objectid = ino;
key.offset = btrfs_ino(dir);
key.type = BTRFS_INODE_REF_KEY;
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
btrfs_free_path(path);
return ret;
} else if (ret > 0) {
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
path->slots[0]--;
} else {
btrfs_free_path(path);
return -ENOENT;
}
}
leaf = path->nodes[0];
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
rref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
name_ptr = (unsigned long)(rref + 1);
name_len = btrfs_root_ref_name_len(leaf, rref);
} else {
iref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_ref);
name_ptr = (unsigned long)(iref + 1);
name_len = btrfs_inode_ref_name_len(leaf, iref);
}
read_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_free_path(path);
/*
* have to add the null termination to make sure that reconnect_path
* gets the right len for strlen
*/
name[name_len] = '\0';
return 0;
}
const struct export_operations btrfs_export_ops = {
.encode_fh = btrfs_encode_fh,
.fh_to_dentry = btrfs_fh_to_dentry,
.fh_to_parent = btrfs_fh_to_parent,
.get_parent = btrfs_get_parent,
.get_name = btrfs_get_name,
};

19
fs/btrfs/export.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef BTRFS_EXPORT_H
#define BTRFS_EXPORT_H
#include <linux/exportfs.h>
extern const struct export_operations btrfs_export_ops;
struct btrfs_fid {
u64 objectid;
u64 root_objectid;
u32 gen;
u64 parent_objectid;
u32 parent_gen;
u64 parent_root_objectid;
} __attribute__ ((packed));
#endif

9693
fs/btrfs/extent-tree.c Normal file

File diff suppressed because it is too large Load diff

5558
fs/btrfs/extent_io.c Normal file

File diff suppressed because it is too large Load diff

380
fs/btrfs/extent_io.h Normal file
View file

@ -0,0 +1,380 @@
#ifndef __EXTENTIO__
#define __EXTENTIO__
#include <linux/rbtree.h>
/* bits for the extent state */
#define EXTENT_DIRTY 1
#define EXTENT_WRITEBACK (1 << 1)
#define EXTENT_UPTODATE (1 << 2)
#define EXTENT_LOCKED (1 << 3)
#define EXTENT_NEW (1 << 4)
#define EXTENT_DELALLOC (1 << 5)
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_NEED_WAIT (1 << 13)
#define EXTENT_DAMAGED (1 << 14)
#define EXTENT_NORESERVE (1 << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/*
* flags for bio submission. The high bits indicate the compression
* type for this bio
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
#define EXTENT_BIO_PARENT_LOCKED 4
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
#define EXTENT_BUFFER_UPTODATE 0
#define EXTENT_BUFFER_DIRTY 2
#define EXTENT_BUFFER_CORRUPT 3
#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
#define EXTENT_BUFFER_TREE_REF 5
#define EXTENT_BUFFER_STALE 6
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_READ_ERR 8 /* read IO error */
#define EXTENT_BUFFER_DUMMY 9
#define EXTENT_BUFFER_IN_TREE 10
#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
/* these are flags for extent_clear_unlock_delalloc */
#define PAGE_UNLOCK (1 << 0)
#define PAGE_CLEAR_DIRTY (1 << 1)
#define PAGE_SET_WRITEBACK (1 << 2)
#define PAGE_END_WRITEBACK (1 << 3)
#define PAGE_SET_PRIVATE2 (1 << 4)
/*
* page->private values. Every page that is controlled by the extent
* map has page->private set to one.
*/
#define EXTENT_PAGE_PRIVATE 1
struct extent_state;
struct btrfs_root;
struct btrfs_io_bio;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 bio_offset);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
extent_submit_bio_hook_t *submit_bio_hook;
int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long *bits);
void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long *bits);
void (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
void (*split_extent_hook)(struct inode *inode,
struct extent_state *orig, u64 split);
};
struct extent_io_tree {
struct rb_root state;
struct address_space *mapping;
u64 dirty_bytes;
int track_uptodate;
spinlock_t lock;
struct extent_io_ops *ops;
};
struct extent_state {
u64 start;
u64 end; /* inclusive */
struct rb_node rb_node;
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
atomic_t refs;
unsigned long state;
/* for use by the FS */
u64 private;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
};
#define INLINE_EXTENT_BUFFER_PAGES 16
#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
struct extent_buffer {
u64 start;
unsigned long len;
unsigned long bflags;
struct btrfs_fs_info *fs_info;
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
int read_mirror;
struct rcu_head rcu_head;
pid_t lock_owner;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
atomic_t read_locks;
atomic_t blocking_writers;
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
short lock_nested;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
short log_index;
/* protects write locks */
rwlock_t lock;
/* readers use lock_wq while they wait for the write
* lock holders to unlock
*/
wait_queue_head_t write_lock_wq;
/* writers use read_lock_wq while they wait for readers
* to unlock
*/
wait_queue_head_t read_lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
int compress_type)
{
*bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
}
static inline int extent_compress_type(unsigned long bio_flags)
{
return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
}
struct extent_map_tree;
typedef struct extent_map *(get_extent_t)(struct inode *inode,
struct page *page,
size_t pg_offset,
u64 start, u64 len,
int create);
void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping);
int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int try_release_extent_buffer(struct page *page);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, struct extent_state **cached);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached, gfp_t mask);
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int filled,
struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long clear_bits,
struct extent_state **cached_state, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned long bits,
struct extent_state **cached_state);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc);
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
u64 start, u64 end, get_extent_t *get_extent,
int mode);
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages,
get_extent_t get_extent);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent);
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
void free_extent_buffer(struct extent_buffer *eb);
void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
#define WAIT_COMPLETE 1
#define WAIT_PAGE_LOCK 2
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, u64 start, int wait,
get_extent_t *get_extent, int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
static inline unsigned long num_extent_pages(u64 start, u64 len)
{
return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
(start >> PAGE_CACHE_SHIFT);
}
static inline void extent_buffer_get(struct extent_buffer *eb)
{
atomic_inc(&eb->refs);
}
int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
unsigned long start,
unsigned long len);
void read_extent_buffer(struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
unsigned long start,
unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_uptodate(struct extent_buffer *eb);
int clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map,
unsigned long *map_start,
unsigned long *map_len);
int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned long bits_to_clear,
unsigned long page_ops);
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
struct btrfs_fs_info;
int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
struct page *page, unsigned int pg_offset,
int mirror_num);
int clean_io_failure(struct inode *inode, u64 start, struct page *page,
unsigned int pg_offset);
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
/*
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
* mirrors. If another mirror has good data, the page is set up to date
* and things continue. If a good mirror can't be found, the original
* bio end_io callback is called to indicate things have failed.
*/
struct io_failure_record {
struct page *page;
u64 start;
u64 len;
u64 logical;
unsigned long bio_flags;
int this_mirror;
int failed_mirror;
int in_validation;
};
void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec, int fail_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
int free_io_failure(struct inode *inode, struct io_failure_record *rec);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
noinline u64 find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree,
struct page *locked_page, u64 *start,
u64 *end, u64 max_bytes);
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
#endif

455
fs/btrfs/extent_map.c Normal file
View file

@ -0,0 +1,455 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include "ctree.h"
#include "extent_map.h"
static struct kmem_cache *extent_map_cache;
int __init extent_map_init(void)
{
extent_map_cache = kmem_cache_create("btrfs_extent_map",
sizeof(struct extent_map), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_map_cache)
return -ENOMEM;
return 0;
}
void extent_map_exit(void)
{
if (extent_map_cache)
kmem_cache_destroy(extent_map_cache);
}
/**
* extent_map_tree_init - initialize extent map tree
* @tree: tree to initialize
*
* Initialize the extent tree @tree. Should be called for each new inode
* or other user of the extent_map interface.
*/
void extent_map_tree_init(struct extent_map_tree *tree)
{
tree->map = RB_ROOT;
INIT_LIST_HEAD(&tree->modified_extents);
rwlock_init(&tree->lock);
}
/**
* alloc_extent_map - allocate new extent map structure
*
* Allocate a new extent_map structure. The new structure is
* returned with a reference count of one and needs to be
* freed using free_extent_map()
*/
struct extent_map *alloc_extent_map(void)
{
struct extent_map *em;
em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
if (!em)
return NULL;
RB_CLEAR_NODE(&em->rb_node);
em->flags = 0;
em->compress_type = BTRFS_COMPRESS_NONE;
em->generation = 0;
atomic_set(&em->refs, 1);
INIT_LIST_HEAD(&em->list);
return em;
}
/**
* free_extent_map - drop reference count of an extent_map
* @em: extent map beeing releasead
*
* Drops the reference out on @em by one and free the structure
* if the reference count hits zero.
*/
void free_extent_map(struct extent_map *em)
{
if (!em)
return;
WARN_ON(atomic_read(&em->refs) == 0);
if (atomic_dec_and_test(&em->refs)) {
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
kfree(em->bdev);
kmem_cache_free(extent_map_cache, em);
}
}
/* simple helper to do math around the end of an extent, handling wrap */
static u64 range_end(u64 start, u64 len)
{
if (start + len < start)
return (u64)-1;
return start + len;
}
static int tree_insert(struct rb_root *root, struct extent_map *em)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct extent_map *entry = NULL;
struct rb_node *orig_parent = NULL;
u64 end = range_end(em->start, em->len);
while (*p) {
parent = *p;
entry = rb_entry(parent, struct extent_map, rb_node);
if (em->start < entry->start)
p = &(*p)->rb_left;
else if (em->start >= extent_map_end(entry))
p = &(*p)->rb_right;
else
return -EEXIST;
}
orig_parent = parent;
while (parent && em->start >= extent_map_end(entry)) {
parent = rb_next(parent);
entry = rb_entry(parent, struct extent_map, rb_node);
}
if (parent)
if (end > entry->start && em->start < extent_map_end(entry))
return -EEXIST;
parent = orig_parent;
entry = rb_entry(parent, struct extent_map, rb_node);
while (parent && em->start < entry->start) {
parent = rb_prev(parent);
entry = rb_entry(parent, struct extent_map, rb_node);
}
if (parent)
if (end > entry->start && em->start < extent_map_end(entry))
return -EEXIST;
rb_link_node(&em->rb_node, orig_parent, p);
rb_insert_color(&em->rb_node, root);
return 0;
}
/*
* search through the tree for an extent_map with a given offset. If
* it can't be found, try to find some neighboring extents
*/
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
struct rb_node **prev_ret,
struct rb_node **next_ret)
{
struct rb_node *n = root->rb_node;
struct rb_node *prev = NULL;
struct rb_node *orig_prev = NULL;
struct extent_map *entry;
struct extent_map *prev_entry = NULL;
while (n) {
entry = rb_entry(n, struct extent_map, rb_node);
prev = n;
prev_entry = entry;
if (offset < entry->start)
n = n->rb_left;
else if (offset >= extent_map_end(entry))
n = n->rb_right;
else
return n;
}
if (prev_ret) {
orig_prev = prev;
while (prev && offset >= extent_map_end(prev_entry)) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*prev_ret = prev;
prev = orig_prev;
}
if (next_ret) {
prev_entry = rb_entry(prev, struct extent_map, rb_node);
while (prev && offset < prev_entry->start) {
prev = rb_prev(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*next_ret = prev;
}
return NULL;
}
/* check to see if two extent_map structs are adjacent and safe to merge */
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
{
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
return 0;
/*
* don't merge compressed extents, we need to know their
* actual size
*/
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
return 0;
if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
test_bit(EXTENT_FLAG_LOGGING, &next->flags))
return 0;
/*
* We don't want to merge stuff that hasn't been written to the log yet
* since it may not reflect exactly what is on disk, and that would be
* bad.
*/
if (!list_empty(&prev->list) || !list_empty(&next->list))
return 0;
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
prev->bdev == next->bdev &&
((next->block_start == EXTENT_MAP_HOLE &&
prev->block_start == EXTENT_MAP_HOLE) ||
(next->block_start == EXTENT_MAP_INLINE &&
prev->block_start == EXTENT_MAP_INLINE) ||
(next->block_start == EXTENT_MAP_DELALLOC &&
prev->block_start == EXTENT_MAP_DELALLOC) ||
(next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
next->block_start == extent_map_block_end(prev)))) {
return 1;
}
return 0;
}
static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
{
struct extent_map *merge = NULL;
struct rb_node *rb;
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
if (rb)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(merge, em)) {
em->start = merge->start;
em->orig_start = merge->orig_start;
em->len += merge->len;
em->block_len += merge->block_len;
em->block_start = merge->block_start;
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
em->mod_start = merge->mod_start;
em->generation = max(em->generation, merge->generation);
rb_erase(&merge->rb_node, &tree->map);
RB_CLEAR_NODE(&merge->rb_node);
free_extent_map(merge);
}
}
rb = rb_next(&em->rb_node);
if (rb)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(em, merge)) {
em->len += merge->len;
em->block_len += merge->block_len;
rb_erase(&merge->rb_node, &tree->map);
RB_CLEAR_NODE(&merge->rb_node);
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
em->generation = max(em->generation, merge->generation);
free_extent_map(merge);
}
}
/**
* unpin_extent_cache - unpin an extent from the cache
* @tree: tree to unpin the extent in
* @start: logical offset in the file
* @len: length of the extent
* @gen: generation that this extent has been modified in
*
* Called after an extent has been written to disk properly. Set the generation
* to the generation that actually added the file item to the inode so we know
* we need to sync this extent when we call fsync().
*/
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
u64 gen)
{
int ret = 0;
struct extent_map *em;
bool prealloc = false;
write_lock(&tree->lock);
em = lookup_extent_mapping(tree, start, len);
WARN_ON(!em || em->start != start);
if (!em)
goto out;
em->generation = gen;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
em->mod_start = em->start;
em->mod_len = em->len;
if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
prealloc = true;
clear_bit(EXTENT_FLAG_FILLING, &em->flags);
}
try_merge_map(tree, em);
if (prealloc) {
em->mod_start = em->start;
em->mod_len = em->len;
}
free_extent_map(em);
out:
write_unlock(&tree->lock);
return ret;
}
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
if (extent_map_in_tree(em))
try_merge_map(tree, em);
}
static inline void setup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em,
int modified)
{
atomic_inc(&em->refs);
em->mod_start = em->start;
em->mod_len = em->len;
if (modified)
list_move(&em->list, &tree->modified_extents);
else
try_merge_map(tree, em);
}
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
* @em: map to insert
*
* Insert @em into @tree or perform a simple forward/backward merge with
* existing mappings. The extent_map struct passed in will be inserted
* into the tree directly, with an additional reference taken, or a
* reference dropped if the merge attempt was successful.
*/
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified)
{
int ret = 0;
ret = tree_insert(&tree->map, em);
if (ret)
goto out;
setup_extent_mapping(tree, em, modified);
out:
return ret;
}
static struct extent_map *
__lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len, int strict)
{
struct extent_map *em;
struct rb_node *rb_node;
struct rb_node *prev = NULL;
struct rb_node *next = NULL;
u64 end = range_end(start, len);
rb_node = __tree_search(&tree->map, start, &prev, &next);
if (!rb_node) {
if (prev)
rb_node = prev;
else if (next)
rb_node = next;
else
return NULL;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
if (strict && !(end > em->start && start < extent_map_end(em)))
return NULL;
atomic_inc(&em->refs);
return em;
}
/**
* lookup_extent_mapping - lookup extent_map
* @tree: tree to lookup in
* @start: byte offset to start the search
* @len: length of the lookup range
*
* Find and return the first extent_map struct in @tree that intersects the
* [start, len] range. There may be additional objects in the tree that
* intersect, so check the object returned carefully to make sure that no
* additional lookups are needed.
*/
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
return __lookup_extent_mapping(tree, start, len, 1);
}
/**
* search_extent_mapping - find a nearby extent map
* @tree: tree to lookup in
* @start: byte offset to start the search
* @len: length of the lookup range
*
* Find and return the first extent_map struct in @tree that intersects the
* [start, len] range.
*
* If one can't be found, any nearby extent may be returned
*/
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
return __lookup_extent_mapping(tree, start, len, 0);
}
/**
* remove_extent_mapping - removes an extent_map from the extent tree
* @tree: extent tree to remove from
* @em: extent map beeing removed
*
* Removes @em from @tree. No reference counts are dropped, and no checks
* are done to see if the range is in use
*/
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
int ret = 0;
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
rb_erase(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
list_del_init(&em->list);
RB_CLEAR_NODE(&em->rb_node);
return ret;
}
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified)
{
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
ASSERT(extent_map_in_tree(cur));
if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
list_del_init(&cur->list);
rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map);
RB_CLEAR_NODE(&cur->rb_node);
setup_extent_mapping(tree, new, modified);
}

85
fs/btrfs/extent_map.h Normal file
View file

@ -0,0 +1,85 @@
#ifndef __EXTENTMAP__
#define __EXTENTMAP__
#include <linux/rbtree.h>
#define EXTENT_MAP_LAST_BYTE ((u64)-4)
#define EXTENT_MAP_HOLE ((u64)-3)
#define EXTENT_MAP_INLINE ((u64)-2)
#define EXTENT_MAP_DELALLOC ((u64)-1)
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
#define EXTENT_FLAG_COMPRESSED 1
#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
#define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */
struct extent_map {
struct rb_node rb_node;
/* all of these are in bytes */
u64 start;
u64 len;
u64 mod_start;
u64 mod_len;
u64 orig_start;
u64 orig_block_len;
u64 ram_bytes;
u64 block_start;
u64 block_len;
u64 generation;
unsigned long flags;
struct block_device *bdev;
atomic_t refs;
unsigned int compress_type;
struct list_head list;
};
struct extent_map_tree {
struct rb_root map;
struct list_head modified_extents;
rwlock_t lock;
};
static inline int extent_map_in_tree(const struct extent_map *em)
{
return !RB_EMPTY_NODE(&em->rb_node);
}
static inline u64 extent_map_end(struct extent_map *em)
{
if (em->start + em->len < em->start)
return (u64)-1;
return em->start + em->len;
}
static inline u64 extent_map_block_end(struct extent_map *em)
{
if (em->block_start + em->block_len < em->block_start)
return (u64)-1;
return em->block_start + em->block_len;
}
void extent_map_tree_init(struct extent_map_tree *tree);
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified);
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified);
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void extent_map_exit(void);
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
#endif

953
fs/btrfs/file-item.c Normal file
View file

@ -0,0 +1,953 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "volumes.h"
#include "print-tree.h"
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
size) - 1))
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
PAGE_CACHE_SIZE))
#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
sizeof(struct btrfs_ordered_sum)) / \
sizeof(u32) * (r)->sectorsize)
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
u64 disk_offset, u64 disk_num_bytes,
u64 num_bytes, u64 offset, u64 ram_bytes,
u8 compression, u8 encryption, u16 other_encoding)
{
int ret = 0;
struct btrfs_file_extent_item *item;
struct btrfs_key file_key;
struct btrfs_path *path;
struct extent_buffer *leaf;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
file_key.objectid = objectid;
file_key.offset = pos;
file_key.type = BTRFS_EXTENT_DATA_KEY;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
sizeof(*item));
if (ret < 0)
goto out;
BUG_ON(ret); /* Can't happen */
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
btrfs_set_file_extent_offset(leaf, item, offset);
btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
btrfs_set_file_extent_generation(leaf, item, trans->transid);
btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
btrfs_set_file_extent_compression(leaf, item, compression);
btrfs_set_file_extent_encryption(leaf, item, encryption);
btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
btrfs_mark_buffer_dirty(leaf);
out:
btrfs_free_path(path);
return ret;
}
static struct btrfs_csum_item *
btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, int cow)
{
int ret;
struct btrfs_key file_key;
struct btrfs_key found_key;
struct btrfs_csum_item *item;
struct extent_buffer *leaf;
u64 csum_offset = 0;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
int csums_in_item;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
file_key.offset = bytenr;
file_key.type = BTRFS_EXTENT_CSUM_KEY;
ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
if (ret < 0)
goto fail;
leaf = path->nodes[0];
if (ret > 0) {
ret = 1;
if (path->slots[0] == 0)
goto fail;
path->slots[0]--;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.type != BTRFS_EXTENT_CSUM_KEY)
goto fail;
csum_offset = (bytenr - found_key.offset) >>
root->fs_info->sb->s_blocksize_bits;
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
csums_in_item /= csum_size;
if (csum_offset == csums_in_item) {
ret = -EFBIG;
goto fail;
} else if (csum_offset > csums_in_item) {
goto fail;
}
}
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item = (struct btrfs_csum_item *)((unsigned char *)item +
csum_offset * csum_size);
return item;
fail:
if (ret > 0)
ret = -ENOENT;
return ERR_PTR(ret);
}
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
u64 offset, int mod)
{
int ret;
struct btrfs_key file_key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
file_key.objectid = objectid;
file_key.offset = offset;
file_key.type = BTRFS_EXTENT_DATA_KEY;
ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
return ret;
}
static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
{
kfree(bio->csum_allocated);
}
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
struct bio_vec *bvec = bio->bi_io_vec;
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_path *path;
u8 *csum;
u64 offset = 0;
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
u32 diff;
int nblocks;
int bio_index = 0;
int count;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
GFP_NOFS);
if (!btrfs_bio->csum_allocated) {
btrfs_free_path(path);
return -ENOMEM;
}
btrfs_bio->csum = btrfs_bio->csum_allocated;
btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
} else {
btrfs_bio->csum = btrfs_bio->csum_inline;
}
csum = btrfs_bio->csum;
} else {
csum = (u8 *)dst;
}
if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
path->reada = 2;
WARN_ON(bio->bi_vcnt <= 0);
/*
* the free space stuff is only read when it hasn't been
* updated in the current transaction. So, we can safely
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
if (btrfs_is_free_space_inode(inode)) {
path->search_commit_root = 1;
path->skip_locking = 1;
}
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
if (dio)
offset = logical_offset;
while (bio_index < bio->bi_vcnt) {
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
(u32 *)csum, nblocks);
if (count)
goto found;
if (!item || disk_bytenr < item_start_offset ||
disk_bytenr >= item_last_offset) {
struct btrfs_key found_key;
u32 item_size;
if (item)
btrfs_release_path(path);
item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
path, disk_bytenr, 0);
if (IS_ERR(item)) {
count = 1;
memset(csum, 0, csum_size);
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
offset + bvec->bv_len - 1,
EXTENT_NODATASUM, GFP_NOFS);
} else {
btrfs_info(BTRFS_I(inode)->root->fs_info,
"no csum found for inode %llu start %llu",
btrfs_ino(inode), offset);
}
item = NULL;
btrfs_release_path(path);
goto found;
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
item_start_offset = found_key.offset;
item_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
item_last_offset = item_start_offset +
(item_size / csum_size) *
root->sectorsize;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
}
/*
* this byte range must be able to fit inside
* a single leaf so it will also fit inside a u32
*/
diff = disk_bytenr - item_start_offset;
diff = diff / root->sectorsize;
diff = diff * csum_size;
count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
inode->i_sb->s_blocksize_bits);
read_extent_buffer(path->nodes[0], csum,
((unsigned long)item) + diff,
csum_size * count);
found:
csum += count * csum_size;
nblocks -= count;
bio_index += count;
while (count--) {
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
bvec++;
}
}
btrfs_free_path(path);
return 0;
}
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0);
}
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 offset)
{
return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit)
{
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_ordered_sum *sums;
struct btrfs_csum_item *item;
LIST_HEAD(tmplist);
unsigned long offset;
int ret;
size_t size;
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
ASSERT(IS_ALIGNED(start, root->sectorsize) &&
IS_ALIGNED(end + 1, root->sectorsize));
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
if (search_commit) {
path->skip_locking = 1;
path->reada = 2;
path->search_commit_root = 1;
}
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.offset = start;
key.type = BTRFS_EXTENT_CSUM_KEY;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
if (ret > 0 && path->slots[0] > 0) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
key.type == BTRFS_EXTENT_CSUM_KEY) {
offset = (start - key.offset) >>
root->fs_info->sb->s_blocksize_bits;
if (offset * csum_size <
btrfs_item_size_nr(leaf, path->slots[0] - 1))
path->slots[0]--;
}
}
while (start <= end) {
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto fail;
if (ret > 0)
break;
leaf = path->nodes[0];
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset > end)
break;
if (key.offset > start)
start = key.offset;
size = btrfs_item_size_nr(leaf, path->slots[0]);
csum_end = key.offset + (size / csum_size) * root->sectorsize;
if (csum_end <= start) {
path->slots[0]++;
continue;
}
csum_end = min(csum_end, end + 1);
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
while (start < csum_end) {
size = min_t(size_t, csum_end - start,
MAX_ORDERED_SUM_BYTES(root));
sums = kzalloc(btrfs_ordered_sum_size(root, size),
GFP_NOFS);
if (!sums) {
ret = -ENOMEM;
goto fail;
}
sums->bytenr = start;
sums->len = (int)size;
offset = (start - key.offset) >>
root->fs_info->sb->s_blocksize_bits;
offset *= csum_size;
size >>= root->fs_info->sb->s_blocksize_bits;
read_extent_buffer(path->nodes[0],
sums->sums,
((unsigned long)item) + offset,
csum_size * size);
start += root->sectorsize * size;
list_add_tail(&sums->list, &tmplist);
}
path->slots[0]++;
}
ret = 0;
fail:
while (ret < 0 && !list_empty(&tmplist)) {
sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
list_del(&sums->list);
kfree(sums);
}
list_splice_tail(&tmplist, list);
btrfs_free_path(path);
return ret;
}
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig)
{
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered;
char *data;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
int index;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
u64 offset;
WARN_ON(bio->bi_vcnt <= 0);
sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
GFP_NOFS);
if (!sums)
return -ENOMEM;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
if (contig)
offset = file_start;
else
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
index = 0;
while (bio_index < bio->bi_vcnt) {
if (!contig)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
if (offset >= ordered->file_offset + ordered->len ||
offset < ordered->file_offset) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
this_sum_bytes = 0;
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_put_ordered_extent(ordered);
bytes_left = bio->bi_iter.bi_size - total_bytes;
sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
GFP_NOFS);
BUG_ON(!sums); /* -ENOMEM */
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
total_bytes;
index = 0;
}
data = kmap_atomic(bvec->bv_page);
sums->sums[index] = ~(u32)0;
sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset,
sums->sums[index],
bvec->bv_len);
kunmap_atomic(data);
btrfs_csum_final(sums->sums[index],
(char *)(sums->sums + index));
bio_index++;
index++;
total_bytes += bvec->bv_len;
this_sum_bytes += bvec->bv_len;
offset += bvec->bv_len;
bvec++;
}
this_sum_bytes = 0;
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_put_ordered_extent(ordered);
return 0;
}
/*
* helper function for csum removal, this expects the
* key to describe the csum pointed to by the path, and it expects
* the csum to overlap the range [bytenr, len]
*
* The csum should not be entirely contained in the range and the
* range should not be entirely contained in the csum.
*
* This calls btrfs_truncate_item with the correct args based on the
* overlap, and fixes up the key as required.
*/
static noinline void truncate_one_csum(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *key,
u64 bytenr, u64 len)
{
struct extent_buffer *leaf;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
u64 csum_end;
u64 end_byte = bytenr + len;
u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
leaf = path->nodes[0];
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
csum_end <<= root->fs_info->sb->s_blocksize_bits;
csum_end += key->offset;
if (key->offset < bytenr && csum_end <= end_byte) {
/*
* [ bytenr - len ]
* [ ]
* [csum ]
* A simple truncate off the end of the item
*/
u32 new_size = (bytenr - key->offset) >> blocksize_bits;
new_size *= csum_size;
btrfs_truncate_item(root, path, new_size, 1);
} else if (key->offset >= bytenr && csum_end > end_byte &&
end_byte > key->offset) {
/*
* [ bytenr - len ]
* [ ]
* [csum ]
* we need to truncate from the beginning of the csum
*/
u32 new_size = (csum_end - end_byte) >> blocksize_bits;
new_size *= csum_size;
btrfs_truncate_item(root, path, new_size, 0);
key->offset = end_byte;
btrfs_set_item_key_safe(root, path, key);
} else {
BUG();
}
}
/*
* deletes the csum items from the csum tree for a given
* range of bytes.
*/
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len)
{
struct btrfs_path *path;
struct btrfs_key key;
u64 end_byte = bytenr + len;
u64 csum_end;
struct extent_buffer *leaf;
int ret;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
root = root->fs_info->csum_root;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
while (1) {
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.offset = end_byte - 1;
key.type = BTRFS_EXTENT_CSUM_KEY;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
if (path->slots[0] == 0)
break;
path->slots[0]--;
} else if (ret < 0) {
break;
}
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
key.type != BTRFS_EXTENT_CSUM_KEY) {
break;
}
if (key.offset >= end_byte)
break;
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
csum_end <<= blocksize_bits;
csum_end += key.offset;
/* this csum ends before we start, we're done */
if (csum_end <= bytenr)
break;
/* delete the entire item, it is inside our range */
if (key.offset >= bytenr && csum_end <= end_byte) {
ret = btrfs_del_item(trans, root, path);
if (ret)
goto out;
if (key.offset == bytenr)
break;
} else if (key.offset < bytenr && csum_end > end_byte) {
unsigned long offset;
unsigned long shift_len;
unsigned long item_offset;
/*
* [ bytenr - len ]
* [csum ]
*
* Our bytes are in the middle of the csum,
* we need to split this item and insert a new one.
*
* But we can't drop the path because the
* csum could change, get removed, extended etc.
*
* The trick here is the max size of a csum item leaves
* enough room in the tree block for a single
* item header. So, we split the item in place,
* adding a new header pointing to the existing
* bytes. Then we loop around again and we have
* a nicely formed csum item that we can neatly
* truncate.
*/
offset = (bytenr - key.offset) >> blocksize_bits;
offset *= csum_size;
shift_len = (len >> blocksize_bits) * csum_size;
item_offset = btrfs_item_ptr_offset(leaf,
path->slots[0]);
memset_extent_buffer(leaf, 0, item_offset + offset,
shift_len);
key.offset = bytenr;
/*
* btrfs_split_item returns -EAGAIN when the
* item changed size or key
*/
ret = btrfs_split_item(trans, root, path, &key, offset);
if (ret && ret != -EAGAIN) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
key.offset = end_byte - 1;
} else {
truncate_one_csum(root, path, &key, bytenr, len);
if (key.offset < bytenr)
break;
}
btrfs_release_path(path);
}
ret = 0;
out:
btrfs_free_path(path);
return ret;
}
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums)
{
struct btrfs_key file_key;
struct btrfs_key found_key;
struct btrfs_path *path;
struct btrfs_csum_item *item;
struct btrfs_csum_item *item_end;
struct extent_buffer *leaf = NULL;
u64 next_offset;
u64 total_bytes = 0;
u64 csum_offset;
u64 bytenr;
u32 nritems;
u32 ins_size;
int index = 0;
int found_next;
int ret;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
again:
next_offset = (u64)-1;
found_next = 0;
bytenr = sums->bytenr + total_bytes;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
file_key.offset = bytenr;
file_key.type = BTRFS_EXTENT_CSUM_KEY;
item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
if (!IS_ERR(item)) {
ret = 0;
leaf = path->nodes[0];
item_end = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
goto found;
}
ret = PTR_ERR(item);
if (ret != -EFBIG && ret != -ENOENT)
goto fail_unlock;
if (ret == -EFBIG) {
u32 item_size;
/* we found one, but it isn't big enough yet */
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if ((item_size / csum_size) >=
MAX_CSUM_ITEMS(root, csum_size)) {
/* already at max size, make a new one */
goto insert;
}
} else {
int slot = path->slots[0] + 1;
/* we didn't find a csum item, insert one */
nritems = btrfs_header_nritems(path->nodes[0]);
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
if (ret == 1)
found_next = 1;
if (ret != 0)
goto insert;
slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
found_key.type != BTRFS_EXTENT_CSUM_KEY) {
found_next = 1;
goto insert;
}
next_offset = found_key.offset;
found_next = 1;
goto insert;
}
/*
* at this point, we know the tree has an item, but it isn't big
* enough yet to put our csum in. Grow it
*/
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &file_key, path,
csum_size, 1);
if (ret < 0)
goto fail_unlock;
if (ret > 0) {
if (path->slots[0] == 0)
goto insert;
path->slots[0]--;
}
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
csum_offset = (bytenr - found_key.offset) >>
root->fs_info->sb->s_blocksize_bits;
if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
goto insert;
}
if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
csum_size) {
int extend_nr;
u64 tmp;
u32 diff;
u32 free_space;
if (btrfs_leaf_free_space(root, leaf) <
sizeof(struct btrfs_item) + csum_size * 2)
goto insert;
free_space = btrfs_leaf_free_space(root, leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = sums->len - total_bytes;
tmp >>= root->fs_info->sb->s_blocksize_bits;
WARN_ON(tmp < 1);
extend_nr = max_t(int, 1, (int)tmp);
diff = (csum_offset + extend_nr) * csum_size;
diff = min(diff, MAX_CSUM_ITEMS(root, csum_size) * csum_size);
diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
diff = min(free_space, diff);
diff /= csum_size;
diff *= csum_size;
btrfs_extend_item(root, path, diff);
ret = 0;
goto csum;
}
insert:
btrfs_release_path(path);
csum_offset = 0;
if (found_next) {
u64 tmp;
tmp = sums->len - total_bytes;
tmp >>= root->fs_info->sb->s_blocksize_bits;
tmp = min(tmp, (next_offset - file_key.offset) >>
root->fs_info->sb->s_blocksize_bits);
tmp = max((u64)1, tmp);
tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
ins_size = csum_size * tmp;
} else {
ins_size = csum_size;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
ins_size);
path->leave_spinning = 0;
if (ret < 0)
goto fail_unlock;
if (WARN_ON(ret != 0))
goto fail_unlock;
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((unsigned char *)item +
btrfs_item_size_nr(leaf, path->slots[0]));
item = (struct btrfs_csum_item *)((unsigned char *)item +
csum_offset * csum_size);
found:
ins_size = (u32)(sums->len - total_bytes) >>
root->fs_info->sb->s_blocksize_bits;
ins_size *= csum_size;
ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
ins_size);
write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
ins_size);
ins_size /= csum_size;
total_bytes += ins_size * root->sectorsize;
index += ins_size;
btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) {
btrfs_release_path(path);
cond_resched();
goto again;
}
out:
btrfs_free_path(path);
return ret;
fail_unlock:
goto out;
}
void btrfs_extent_item_to_extent_map(struct inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const bool new_inline,
struct extent_map *em)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
struct btrfs_key key;
u64 extent_start, extent_end;
u64 bytenr;
u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi);
em->bdev = root->fs_info->fs_devices->latest_bdev;
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, fi);
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
size = btrfs_file_extent_inline_len(leaf, slot, fi);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;
em->len = extent_end - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, fi);
em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
return;
}
if (compress_type != BTRFS_COMPRESS_NONE) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
em->block_start = bytenr;
em->block_len = em->orig_block_len;
} else {
bytenr += btrfs_file_extent_offset(leaf, fi);
em->block_start = bytenr;
em->block_len = em->len;
if (type == BTRFS_FILE_EXTENT_PREALLOC)
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
em->block_start = EXTENT_MAP_INLINE;
em->start = extent_start;
em->len = extent_end - extent_start;
/*
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().
*/
em->orig_start = EXTENT_MAP_HOLE;
em->block_len = (u64)-1;
if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
}
} else {
btrfs_err(root->fs_info,
"unknown file extent item type %d, inode %llu, offset %llu, root %llu",
type, btrfs_ino(inode), extent_start,
root->root_key.objectid);
}
}

2812
fs/btrfs/file.c Normal file

File diff suppressed because it is too large Load diff

3414
fs/btrfs/free-space-cache.c Normal file

File diff suppressed because it is too large Load diff

124
fs/btrfs/free-space-cache.h Normal file
View file

@ -0,0 +1,124 @@
/*
* Copyright (C) 2009 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_FREE_SPACE_CACHE
#define __BTRFS_FREE_SPACE_CACHE
struct btrfs_free_space {
struct rb_node offset_index;
u64 offset;
u64 bytes;
unsigned long *bitmap;
struct list_head list;
};
struct btrfs_free_space_ctl {
spinlock_t tree_lock;
struct rb_root free_space_offset;
u64 free_space;
int extents_thresh;
int free_extents;
int total_bitmaps;
int unit;
u64 start;
struct btrfs_free_space_op *op;
void *private;
};
struct btrfs_free_space_op {
void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
};
struct inode *lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path);
int create_free_space_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path);
int create_free_ino_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path);
int load_free_ino_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct inode *inode);
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
u64 bytenr, u64 size);
static inline int
btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size)
{
return __btrfs_add_free_space(block_group->free_space_ctl,
bytenr, size);
}
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
int btrfs_find_space_cluster(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
/* Support functions for runnint our sanity tests */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
u64 offset, u64 bytes, bool bitmap);
int test_check_exists(struct btrfs_block_group_cache *cache,
u64 offset, u64 bytes);
#endif
#endif

46
fs/btrfs/hash.c Normal file
View file

@ -0,0 +1,46 @@
/*
* Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <crypto/hash.h>
#include <linux/err.h>
#include "hash.h"
static struct crypto_shash *tfm;
int __init btrfs_hash_init(void)
{
tfm = crypto_alloc_shash("crc32c", 0, 0);
return PTR_ERR_OR_ZERO(tfm);
}
void btrfs_hash_exit(void)
{
crypto_free_shash(tfm);
}
u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
{
SHASH_DESC_ON_STACK(shash, tfm);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
int err;
shash->tfm = tfm;
shash->flags = 0;
*ctx = crc;
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
return *ctx;
}

42
fs/btrfs/hash.h Normal file
View file

@ -0,0 +1,42 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __HASH__
#define __HASH__
int __init btrfs_hash_init(void);
void btrfs_hash_exit(void);
u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
static inline u64 btrfs_name_hash(const char *name, int len)
{
return btrfs_crc32c((u32)~1, name, len);
}
/*
* Figure the key offset of an extended inode ref
*/
static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
int len)
{
return (u64) btrfs_crc32c(parent_objectid, name, len);
}
#endif

435
fs/btrfs/inode-item.c Normal file
View file

@ -0,0 +1,435 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
#include "transaction.h"
#include "print-tree.h"
static int find_name_in_backref(struct btrfs_path *path, const char *name,
int name_len, struct btrfs_inode_ref **ref_ret)
{
struct extent_buffer *leaf;
struct btrfs_inode_ref *ref;
unsigned long ptr;
unsigned long name_ptr;
u32 item_size;
u32 cur_offset = 0;
int len;
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
while (cur_offset < item_size) {
ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
len = btrfs_inode_ref_name_len(leaf, ref);
name_ptr = (unsigned long)(ref + 1);
cur_offset += len + sizeof(*ref);
if (len != name_len)
continue;
if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
*ref_ret = ref;
return 1;
}
}
return 0;
}
int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
const char *name, int name_len,
struct btrfs_inode_extref **extref_ret)
{
struct extent_buffer *leaf;
struct btrfs_inode_extref *extref;
unsigned long ptr;
unsigned long name_ptr;
u32 item_size;
u32 cur_offset = 0;
int ref_name_len;
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
/*
* Search all extended backrefs in this item. We're only
* looking through any collisions so most of the time this is
* just going to compare against one buffer. If all is well,
* we'll return success and the inode ref object.
*/
while (cur_offset < item_size) {
extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
name_ptr = (unsigned long)(&extref->name);
ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
if (ref_name_len == name_len &&
btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
(memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) {
if (extref_ret)
*extref_ret = extref;
return 1;
}
cur_offset += ref_name_len + sizeof(*extref);
}
return 0;
}
/* Returns NULL if no extref found */
struct btrfs_inode_extref *
btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int ins_len,
int cow)
{
int ret;
struct btrfs_key key;
struct btrfs_inode_extref *extref;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return NULL;
if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref))
return NULL;
return extref;
}
static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid,
u64 *index)
{
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_inode_extref *extref;
struct extent_buffer *leaf;
int ret;
int del_len = name_len + sizeof(*extref);
unsigned long ptr;
unsigned long item_start;
u32 item_size;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
goto out;
/*
* Sanity check - did we find the right item for this name?
* This should always succeed so error here will make the FS
* readonly.
*/
if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
name, name_len, &extref)) {
btrfs_std_error(root->fs_info, -ENOENT);
ret = -EROFS;
goto out;
}
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (index)
*index = btrfs_inode_extref_index(leaf, extref);
if (del_len == item_size) {
/*
* Common case only one ref in the item, remove the
* whole item.
*/
ret = btrfs_del_item(trans, root, path);
goto out;
}
ptr = (unsigned long)extref;
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + del_len,
item_size - (ptr + del_len - item_start));
btrfs_truncate_item(root, path, item_size - del_len, 1);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index)
{
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_inode_ref *ref;
struct extent_buffer *leaf;
unsigned long ptr;
unsigned long item_start;
u32 item_size;
u32 sub_item_len;
int ret;
int search_ext_refs = 0;
int del_len = name_len + sizeof(*ref);
key.objectid = inode_objectid;
key.offset = ref_objectid;
key.type = BTRFS_INODE_REF_KEY;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
ret = -ENOENT;
search_ext_refs = 1;
goto out;
} else if (ret < 0) {
goto out;
}
if (!find_name_in_backref(path, name, name_len, &ref)) {
ret = -ENOENT;
search_ext_refs = 1;
goto out;
}
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (index)
*index = btrfs_inode_ref_index(leaf, ref);
if (del_len == item_size) {
ret = btrfs_del_item(trans, root, path);
goto out;
}
ptr = (unsigned long)ref;
sub_item_len = name_len + sizeof(*ref);
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
btrfs_truncate_item(root, path, item_size - sub_item_len, 1);
out:
btrfs_free_path(path);
if (search_ext_refs) {
/*
* No refs were found, or we could not find the
* name in our ref array. Find and remove the extended
* inode ref then.
*/
return btrfs_del_inode_extref(trans, root, name, name_len,
inode_objectid, ref_objectid, index);
}
return ret;
}
/*
* btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree.
*
* The caller must have checked against BTRFS_LINK_MAX already.
*/
static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 index)
{
struct btrfs_inode_extref *extref;
int ret;
int ins_len = name_len + sizeof(*extref);
unsigned long ptr;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_item *item;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);
if (ret == -EEXIST) {
if (btrfs_find_name_in_ext_backref(path, ref_objectid,
name, name_len, NULL))
goto out;
btrfs_extend_item(root, path, ins_len);
ret = 0;
}
if (ret < 0)
goto out;
leaf = path->nodes[0];
item = btrfs_item_nr(path->slots[0]);
ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
ptr += btrfs_item_size(leaf, item) - ins_len;
extref = (struct btrfs_inode_extref *)ptr;
btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len);
btrfs_set_inode_extref_index(path->nodes[0], extref, index);
btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);
ptr = (unsigned long)&extref->name;
write_extent_buffer(path->nodes[0], name, ptr, name_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_free_path(path);
return ret;
}
/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 index)
{
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_inode_ref *ref;
unsigned long ptr;
int ret;
int ins_len = name_len + sizeof(*ref);
key.objectid = inode_objectid;
key.offset = ref_objectid;
key.type = BTRFS_INODE_REF_KEY;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);
if (ret == -EEXIST) {
u32 old_size;
if (find_name_in_backref(path, name, name_len, &ref))
goto out;
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
btrfs_extend_item(root, path, ins_len);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
ptr = (unsigned long)(ref + 1);
ret = 0;
} else if (ret < 0) {
if (ret == -EOVERFLOW)
ret = -EMLINK;
goto out;
} else {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
ptr = (unsigned long)(ref + 1);
}
write_extent_buffer(path->nodes[0], name, ptr, name_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_free_path(path);
if (ret == -EMLINK) {
struct btrfs_super_block *disk_super = root->fs_info->super_copy;
/* We ran out of space in the ref array. Need to
* add an extended ref. */
if (btrfs_super_incompat_flags(disk_super)
& BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
ret = btrfs_insert_inode_extref(trans, root, name,
name_len,
inode_objectid,
ref_objectid, index);
}
return ret;
}
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid)
{
struct btrfs_key key;
int ret;
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(struct btrfs_inode_item));
return ret;
}
int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path,
struct btrfs_key *location, int mod)
{
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
int ret;
int slot;
struct extent_buffer *leaf;
struct btrfs_key found_key;
ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
if (ret > 0 && location->type == BTRFS_ROOT_ITEM_KEY &&
location->offset == (u64)-1 && path->slots[0] != 0) {
slot = path->slots[0] - 1;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid == location->objectid &&
found_key.type == location->type) {
path->slots[0]--;
return 0;
}
}
return ret;
}

566
fs/btrfs/inode-map.c Normal file
View file

@ -0,0 +1,566 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>
#include "ctree.h"
#include "disk-io.h"
#include "free-space-cache.h"
#include "inode-map.h"
#include "transaction.h"
static int caching_kthread(void *data)
{
struct btrfs_root *root = data;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
u64 last = (u64)-1;
int slot;
int ret;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
/* Since the commit root is read-only, we can safely skip locking. */
path->skip_locking = 1;
path->search_commit_root = 1;
path->reada = 2;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.offset = 0;
key.type = BTRFS_INODE_ITEM_KEY;
again:
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->commit_root_sem);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
while (1) {
if (btrfs_fs_closing(fs_info))
goto out;
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
else if (ret > 0)
break;
if (need_resched() ||
btrfs_transaction_in_commit(fs_info)) {
leaf = path->nodes[0];
if (WARN_ON(btrfs_header_nritems(leaf) == 0))
break;
/*
* Save the key so we can advances forward
* in the next search.
*/
btrfs_item_key_to_cpu(leaf, &key, 0);
btrfs_release_path(path);
root->ino_cache_progress = last;
up_read(&fs_info->commit_root_sem);
schedule_timeout(1);
goto again;
} else
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.type != BTRFS_INODE_ITEM_KEY)
goto next;
if (key.objectid >= root->highest_objectid)
break;
if (last != (u64)-1 && last + 1 != key.objectid) {
__btrfs_add_free_space(ctl, last + 1,
key.objectid - last - 1);
wake_up(&root->ino_cache_wait);
}
last = key.objectid;
next:
path->slots[0]++;
}
if (last < root->highest_objectid - 1) {
__btrfs_add_free_space(ctl, last + 1,
root->highest_objectid - last - 1);
}
spin_lock(&root->ino_cache_lock);
root->ino_cache_state = BTRFS_CACHE_FINISHED;
spin_unlock(&root->ino_cache_lock);
root->ino_cache_progress = (u64)-1;
btrfs_unpin_free_ino(root);
out:
wake_up(&root->ino_cache_wait);
up_read(&fs_info->commit_root_sem);
btrfs_free_path(path);
return ret;
}
static void start_caching(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct task_struct *tsk;
int ret;
u64 objectid;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return;
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_state != BTRFS_CACHE_NO) {
spin_unlock(&root->ino_cache_lock);
return;
}
root->ino_cache_state = BTRFS_CACHE_STARTED;
spin_unlock(&root->ino_cache_lock);
ret = load_free_ino_cache(root->fs_info, root);
if (ret == 1) {
spin_lock(&root->ino_cache_lock);
root->ino_cache_state = BTRFS_CACHE_FINISHED;
spin_unlock(&root->ino_cache_lock);
return;
}
/*
* It can be quite time-consuming to fill the cache by searching
* through the extent tree, and this can keep ino allocation path
* waiting. Therefore at start we quickly find out the highest
* inode number and we know we can use inode numbers which fall in
* [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
*/
ret = btrfs_find_free_objectid(root, &objectid);
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
__btrfs_add_free_space(ctl, objectid,
BTRFS_LAST_FREE_OBJECTID - objectid + 1);
}
tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
root->root_key.objectid);
if (IS_ERR(tsk)) {
btrfs_warn(root->fs_info, "failed to start inode caching task");
btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
"disabling inode map caching");
}
}
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
{
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return btrfs_find_free_objectid(root, objectid);
again:
*objectid = btrfs_find_ino_for_alloc(root);
if (*objectid != 0)
return 0;
start_caching(root);
wait_event(root->ino_cache_wait,
root->ino_cache_state == BTRFS_CACHE_FINISHED ||
root->free_ino_ctl->free_space > 0);
if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
root->free_ino_ctl->free_space == 0)
return -ENOSPC;
else
goto again;
}
void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
{
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return;
again:
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
__btrfs_add_free_space(pinned, objectid, 1);
} else {
down_write(&root->fs_info->commit_root_sem);
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
spin_unlock(&root->ino_cache_lock);
up_write(&root->fs_info->commit_root_sem);
goto again;
}
spin_unlock(&root->ino_cache_lock);
start_caching(root);
__btrfs_add_free_space(pinned, objectid, 1);
up_write(&root->fs_info->commit_root_sem);
}
}
/*
* When a transaction is committed, we'll move those inode numbers which are
* smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
* others will just be dropped, because the commit root we were searching has
* changed.
*
* Must be called with root->fs_info->commit_root_sem held
*/
void btrfs_unpin_free_ino(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
struct btrfs_free_space *info;
struct rb_node *n;
u64 count;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return;
while (1) {
n = rb_first(rbroot);
if (!n)
break;
info = rb_entry(n, struct btrfs_free_space, offset_index);
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
goto free;
else if (info->offset + info->bytes > root->ino_cache_progress)
count = root->ino_cache_progress - info->offset + 1;
else
count = info->bytes;
__btrfs_add_free_space(ctl, info->offset, count);
free:
rb_erase(&info->offset_index, rbroot);
kfree(info);
}
}
#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
/*
* The goal is to keep the memory used by the free_ino tree won't
* exceed the memory if we use bitmaps only.
*/
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *n;
int max_ino;
int max_bitmaps;
n = rb_last(&ctl->free_space_offset);
if (!n) {
ctl->extents_thresh = INIT_THRESHOLD;
return;
}
info = rb_entry(n, struct btrfs_free_space, offset_index);
/*
* Find the maximum inode number in the filesystem. Note we
* ignore the fact that this can be a bitmap, because we are
* not doing precise calculation.
*/
max_ino = info->bytes - 1;
max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
if (max_bitmaps <= ctl->total_bitmaps) {
ctl->extents_thresh = 0;
return;
}
ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
PAGE_CACHE_SIZE / sizeof(*info);
}
/*
* We don't fall back to bitmap, if we are below the extents threshold
* or this chunk of inode numbers is a big one.
*/
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
if (ctl->free_extents < ctl->extents_thresh ||
info->bytes > INODES_PER_BITMAP / 10)
return false;
return true;
}
static struct btrfs_free_space_op free_ino_op = {
.recalc_thresholds = recalculate_thresholds,
.use_bitmap = use_bitmap,
};
static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
{
}
static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
/*
* We always use extents for two reasons:
*
* - The pinned tree is only used during the process of caching
* work.
* - Make code simpler. See btrfs_unpin_free_ino().
*/
return false;
}
static struct btrfs_free_space_op pinned_free_ino_op = {
.recalc_thresholds = pinned_recalc_thresholds,
.use_bitmap = pinned_use_bitmap,
};
void btrfs_init_free_ino_ctl(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
spin_lock_init(&ctl->tree_lock);
ctl->unit = 1;
ctl->start = 0;
ctl->private = NULL;
ctl->op = &free_ino_op;
/*
* Initially we allow to use 16K of ram to cache chunks of
* inode numbers before we resort to bitmaps. This is somewhat
* arbitrary, but it will be adjusted in runtime.
*/
ctl->extents_thresh = INIT_THRESHOLD;
spin_lock_init(&pinned->tree_lock);
pinned->unit = 1;
pinned->start = 0;
pinned->private = NULL;
pinned->extents_thresh = 0;
pinned->op = &pinned_free_ino_op;
}
int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_path *path;
struct inode *inode;
struct btrfs_block_rsv *rsv;
u64 num_bytes;
u64 alloc_hint = 0;
int ret;
int prealloc;
bool retry = false;
/* only fs tree and subvol/snap needs ino cache */
if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
(root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
return 0;
/* Don't save inode cache if we are deleting this root */
if (btrfs_root_refs(&root->root_item) == 0)
return 0;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->trans_block_rsv;
num_bytes = trans->bytes_reserved;
/*
* 1 item for inode item insertion if need
* 4 items for inode item update (in the worst case)
* 1 items for slack space if we need do truncation
* 1 item for free space object
* 3 items for pre-allocation
*/
trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
trans->transid, trans->bytes_reserved, 1);
again:
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
ret = PTR_ERR(inode);
goto out_release;
}
if (IS_ERR(inode)) {
BUG_ON(retry); /* Logic error */
retry = true;
ret = create_free_ino_inode(root, trans, path);
if (ret)
goto out_release;
goto again;
}
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_put;
}
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(root, trans, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, root, ret);
goto out_put;
}
}
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
ret = -1;
spin_unlock(&root->ino_cache_lock);
goto out_put;
}
spin_unlock(&root->ino_cache_lock);
spin_lock(&ctl->tree_lock);
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
spin_unlock(&ctl->tree_lock);
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_CACHE_SIZE;
ret = btrfs_delalloc_reserve_space(inode, prealloc);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
btrfs_delalloc_release_space(inode, prealloc);
goto out_put;
}
btrfs_free_reserved_data_space(inode, prealloc);
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
out_put:
iput(inode);
out_release:
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
trans->transid, trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
out:
trans->block_rsv = rsv;
trans->bytes_reserved = num_bytes;
btrfs_free_path(path);
return ret;
}
static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
{
struct btrfs_path *path;
int ret;
struct extent_buffer *l;
struct btrfs_key search_key;
struct btrfs_key found_key;
int slot;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
search_key.type = -1;
search_key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto error;
BUG_ON(ret == 0); /* Corruption */
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
*objectid = max_t(u64, found_key.objectid,
BTRFS_FIRST_FREE_OBJECTID - 1);
} else {
*objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
}
ret = 0;
error:
btrfs_free_path(path);
return ret;
}
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
{
int ret;
mutex_lock(&root->objectid_mutex);
if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_find_highest_objectid(root,
&root->highest_objectid);
if (ret)
goto out;
}
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
ret = -ENOSPC;
goto out;
}
*objectid = ++root->highest_objectid;
ret = 0;
out:
mutex_unlock(&root->objectid_mutex);
return ret;
}

13
fs/btrfs/inode-map.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef __BTRFS_INODE_MAP
#define __BTRFS_INODE_MAP
void btrfs_init_free_ino_ctl(struct btrfs_root *root);
void btrfs_unpin_free_ino(struct btrfs_root *root);
void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans);
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
#endif

9572
fs/btrfs/inode.c Normal file

File diff suppressed because it is too large Load diff

5374
fs/btrfs/ioctl.c Normal file

File diff suppressed because it is too large Load diff

300
fs/btrfs/locking.c Normal file
View file

@ -0,0 +1,300 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
#include <asm/bug.h>
#include "ctree.h"
#include "extent_io.h"
#include "locking.h"
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
/*
* if we currently have a spinning reader or writer lock
* (indicated by the rw flag) this will bump the count
* of blocking holders and drop the spinlock.
*/
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
/*
* no lock is required. The lock owner may change if
* we have a read lock, but it won't change to or away
* from us. If we have the write lock, we are the owner
* and it'll never change.
*/
if (eb->lock_nested && current->pid == eb->lock_owner)
return;
if (rw == BTRFS_WRITE_LOCK) {
if (atomic_read(&eb->blocking_writers) == 0) {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
btrfs_assert_tree_locked(eb);
atomic_inc(&eb->blocking_writers);
write_unlock(&eb->lock);
}
} else if (rw == BTRFS_READ_LOCK) {
btrfs_assert_tree_read_locked(eb);
atomic_inc(&eb->blocking_readers);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
read_unlock(&eb->lock);
}
return;
}
/*
* if we currently have a blocking lock, take the spinlock
* and drop our blocking count
*/
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
/*
* no lock is required. The lock owner may change if
* we have a read lock, but it won't change to or away
* from us. If we have the write lock, we are the owner
* and it'll never change.
*/
if (eb->lock_nested && current->pid == eb->lock_owner)
return;
if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
if (atomic_dec_and_test(&eb->blocking_writers) &&
waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
}
return;
}
/*
* take a spinning read lock. This will wait for any blocking
* writers
*/
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
again:
BUG_ON(!atomic_read(&eb->blocking_writers) &&
current->pid == eb->lock_owner);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) &&
current->pid == eb->lock_owner) {
/*
* This extent is already write-locked by our thread. We allow
* an additional read lock to be added because it's for the same
* thread. btrfs_find_all_roots() depends on this as it may be
* called on a partly (write-)locked tree.
*/
BUG_ON(eb->lock_nested);
eb->lock_nested = 1;
read_unlock(&eb->lock);
return;
}
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
}
/*
* take a spinning read lock.
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers
*/
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
{
if (atomic_read(&eb->blocking_writers))
return 0;
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
return 1;
}
/*
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers
*/
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
if (atomic_read(&eb->blocking_writers))
return 0;
if (!read_trylock(&eb->lock))
return 0;
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
return 1;
}
/*
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers or readers
*/
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers))
return 0;
write_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
eb->lock_owner = current->pid;
return 1;
}
/*
* drop a spinning read lock
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
* The write unlock will do a barrier for us, and the lock_nested
* field only matters to the lock owner.
*/
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
return;
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
atomic_dec(&eb->read_locks);
read_unlock(&eb->lock);
}
/*
* drop a blocking read lock
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
* The write unlock will do a barrier for us, and the lock_nested
* field only matters to the lock owner.
*/
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
return;
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
/*
* take a spinning write lock. This will wait for both
* blocking readers or writers
*/
void btrfs_tree_lock(struct extent_buffer *eb)
{
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
write_lock(&eb->lock);
if (atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
wait_event(eb->read_lock_wq,
atomic_read(&eb->blocking_readers) == 0);
goto again;
}
if (atomic_read(&eb->blocking_writers)) {
write_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
eb->lock_owner = current->pid;
}
/*
* drop a spinning or a blocking write lock.
*/
void btrfs_tree_unlock(struct extent_buffer *eb)
{
int blockers = atomic_read(&eb->blocking_writers);
BUG_ON(blockers > 1);
btrfs_assert_tree_locked(eb);
eb->lock_owner = 0;
atomic_dec(&eb->write_locks);
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
smp_mb();
if (waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
write_unlock(&eb->lock);
}
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->write_locks));
}
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}

62
fs/btrfs/locking.h Normal file
View file

@ -0,0 +1,62 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_LOCKING_
#define __BTRFS_LOCKING_
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
#define BTRFS_WRITE_LOCK_BLOCKING 3
#define BTRFS_READ_LOCK_BLOCKING 4
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
void btrfs_tree_read_lock(struct extent_buffer *eb);
void btrfs_tree_read_unlock(struct extent_buffer *eb);
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
btrfs_tree_unlock(eb);
else if (rw == BTRFS_READ_LOCK_BLOCKING)
btrfs_tree_read_unlock_blocking(eb);
else if (rw == BTRFS_READ_LOCK)
btrfs_tree_read_unlock(eb);
else
BUG();
}
static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
{
btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
}
static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
{
btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
}
#endif

443
fs/btrfs/lzo.c Normal file
View file

@ -0,0 +1,443 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/lzo.h>
#include "compression.h"
#define LZO_LEN 4
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
void *cbuf; /* where compressed data goes */
struct list_head list;
};
static void lzo_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
vfree(workspace->buf);
vfree(workspace->cbuf);
vfree(workspace->mem);
kfree(workspace);
}
static struct list_head *lzo_alloc_workspace(void)
{
struct workspace *workspace;
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
if (!workspace)
return ERR_PTR(-ENOMEM);
workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
INIT_LIST_HEAD(&workspace->list);
return &workspace->list;
fail:
lzo_free_workspace(&workspace->list);
return ERR_PTR(-ENOMEM);
}
static inline void write_compress_length(char *buf, size_t len)
{
__le32 dlen;
dlen = cpu_to_le32(len);
memcpy(buf, &dlen, LZO_LEN);
}
static inline size_t read_compress_length(char *buf)
{
__le32 dlen;
memcpy(&dlen, buf, LZO_LEN);
return le32_to_cpu(dlen);
}
static int lzo_compress_pages(struct list_head *ws,
struct address_space *mapping,
u64 start, unsigned long len,
struct page **pages,
unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
char *data_in;
char *cpage_out;
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
unsigned long bytes_left;
size_t in_len;
size_t out_len;
char *buf;
unsigned long tot_in = 0;
unsigned long tot_out = 0;
unsigned long pg_bytes_left;
unsigned long out_offset;
unsigned long bytes;
*out_pages = 0;
*total_out = 0;
*total_in = 0;
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
data_in = kmap(in_page);
/*
* store the size of all chunks of compressed data in
* the first 4 bytes
*/
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
cpage_out = kmap(out_page);
out_offset = LZO_LEN;
tot_out = LZO_LEN;
pages[0] = out_page;
nr_pages = 1;
pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
/* compress at most one page of data each time */
in_len = min(len, PAGE_CACHE_SIZE);
while (tot_in < len) {
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
&out_len, workspace->mem);
if (ret != LZO_E_OK) {
printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
ret);
ret = -EIO;
goto out;
}
/* store the size of this chunk of compressed data */
write_compress_length(cpage_out + out_offset, out_len);
tot_out += LZO_LEN;
out_offset += LZO_LEN;
pg_bytes_left -= LZO_LEN;
tot_in += in_len;
tot_out += out_len;
/* copy bytes from the working buffer into the pages */
buf = workspace->cbuf;
while (out_len) {
bytes = min_t(unsigned long, pg_bytes_left, out_len);
memcpy(cpage_out + out_offset, buf, bytes);
out_len -= bytes;
pg_bytes_left -= bytes;
buf += bytes;
out_offset += bytes;
/*
* we need another page for writing out.
*
* Note if there's less than 4 bytes left, we just
* skip to a new page.
*/
if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
pg_bytes_left == 0) {
if (pg_bytes_left) {
memset(cpage_out + out_offset, 0,
pg_bytes_left);
tot_out += pg_bytes_left;
}
/* we're done, don't allocate new page */
if (out_len == 0 && tot_in >= len)
break;
kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
cpage_out = kmap(out_page);
pages[nr_pages++] = out_page;
pg_bytes_left = PAGE_CACHE_SIZE;
out_offset = 0;
}
}
/* we're making it bigger, give up */
if (tot_in > 8192 && tot_in < tot_out) {
ret = -E2BIG;
goto out;
}
/* we're all done */
if (tot_in >= len)
break;
if (tot_out > max_out)
break;
bytes_left = len - tot_in;
kunmap(in_page);
page_cache_release(in_page);
start += PAGE_CACHE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
data_in = kmap(in_page);
in_len = min(bytes_left, PAGE_CACHE_SIZE);
}
if (tot_out > tot_in)
goto out;
/* store the size of all chunks of compressed data */
cpage_out = kmap(pages[0]);
write_compress_length(cpage_out, tot_out);
kunmap(pages[0]);
ret = 0;
*total_out = tot_out;
*total_in = tot_in;
out:
*out_pages = nr_pages;
if (out_page)
kunmap(out_page);
if (in_page) {
kunmap(in_page);
page_cache_release(in_page);
}
return ret;
}
static int lzo_decompress_biovec(struct list_head *ws,
struct page **pages_in,
u64 disk_start,
struct bio_vec *bvec,
int vcnt,
size_t srclen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0, ret2;
char *data_in;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
unsigned long buf_start;
unsigned long buf_offset = 0;
unsigned long bytes;
unsigned long working_bytes;
unsigned long pg_offset;
size_t in_len;
size_t out_len;
unsigned long in_offset;
unsigned long in_page_bytes_left;
unsigned long tot_in;
unsigned long tot_out;
unsigned long tot_len;
char *buf;
bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
tot_in = LZO_LEN;
in_offset = LZO_LEN;
tot_len = min_t(size_t, srclen, tot_len);
in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
tot_out = 0;
pg_offset = 0;
while (tot_in < tot_len) {
in_len = read_compress_length(data_in + in_offset);
in_page_bytes_left -= LZO_LEN;
in_offset += LZO_LEN;
tot_in += LZO_LEN;
tot_in += in_len;
working_bytes = in_len;
may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
may_late_unmap = true;
goto cont;
}
/* copy bytes from the pages into the working buffer */
buf = workspace->cbuf;
buf_offset = 0;
while (working_bytes) {
bytes = min(working_bytes, in_page_bytes_left);
memcpy(buf + buf_offset, data_in + in_offset, bytes);
buf_offset += bytes;
cont:
working_bytes -= bytes;
in_page_bytes_left -= bytes;
in_offset += bytes;
/* check if we need to pick another page */
if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
|| in_page_bytes_left == 0) {
tot_in += in_page_bytes_left;
if (working_bytes == 0 && tot_in >= tot_len)
break;
if (page_in_index + 1 >= total_pages_in) {
ret = -EIO;
goto done;
}
if (may_late_unmap)
need_unmap = true;
else
kunmap(pages_in[page_in_index]);
data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
}
}
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
if (need_unmap)
kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "BTRFS: decompress failed\n");
ret = -EIO;
break;
}
buf_start = tot_out;
tot_out += out_len;
ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
tot_out, disk_start,
bvec, vcnt,
&page_out_index, &pg_offset);
if (ret2 == 0)
break;
}
done:
kunmap(pages_in[page_in_index]);
if (!ret)
btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
return ret;
}
static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
size_t out_len;
size_t tot_len;
int ret = 0;
char *kaddr;
unsigned long bytes;
BUG_ON(srclen < LZO_LEN);
tot_len = read_compress_length(data_in);
data_in += LZO_LEN;
in_len = read_compress_length(data_in);
data_in += LZO_LEN;
out_len = PAGE_CACHE_SIZE;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "BTRFS: decompress failed!\n");
ret = -EIO;
goto out;
}
if (out_len < start_byte) {
ret = -EIO;
goto out;
}
/*
* the caller is already checking against PAGE_SIZE, but lets
* move this check closer to the memcpy/memset
*/
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
/*
* btrfs_getblock is doing a zero on the tail of the page too,
* but this will cover anything missing from the decompressed
* data.
*/
if (bytes < destlen)
memset(kaddr+bytes, 0, destlen-bytes);
kunmap_atomic(kaddr);
out:
return ret;
}
struct btrfs_compress_op btrfs_lzo_compress = {
.alloc_workspace = lzo_alloc_workspace,
.free_workspace = lzo_free_workspace,
.compress_pages = lzo_compress_pages,
.decompress_biovec = lzo_decompress_biovec,
.decompress = lzo_decompress,
};

44
fs/btrfs/math.h Normal file
View file

@ -0,0 +1,44 @@
/*
* Copyright (C) 2012 Fujitsu. All rights reserved.
* Written by Miao Xie <miaox@cn.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_MATH_H
#define __BTRFS_MATH_H
#include <asm/div64.h>
static inline u64 div_factor(u64 num, int factor)
{
if (factor == 10)
return num;
num *= factor;
do_div(num, 10);
return num;
}
static inline u64 div_factor_fine(u64 num, int factor)
{
if (factor == 100)
return num;
num *= factor;
do_div(num, 100);
return num;
}
#endif

1055
fs/btrfs/ordered-data.c Normal file

File diff suppressed because it is too large Load diff

210
fs/btrfs/ordered-data.h Normal file
View file

@ -0,0 +1,210 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_ORDERED_DATA__
#define __BTRFS_ORDERED_DATA__
/* one of these per inode */
struct btrfs_ordered_inode_tree {
spinlock_t lock;
struct rb_root tree;
struct rb_node *last;
};
struct btrfs_ordered_sum {
/* bytenr is the start of this extent on disk */
u64 bytenr;
/*
* this is the length in bytes covered by the sums array below.
*/
int len;
struct list_head list;
/* last field is a variable length array of csums */
u32 sums[];
};
/*
* bits for the flags field:
*
* BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
* It is used to make sure metadata is inserted into the tree only once
* per extent.
*
* BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
* rbtree, just before waking any waiters. It is used to indicate the
* IO is done and any metadata is inserted into the tree.
*/
#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
* has done its due diligence in updating
* the isize. */
#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
ordered extent */
#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
* in the logging code. */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
/* disk byte number */
u64 start;
/* ram length of the extent in bytes */
u64 len;
/* extent length on disk */
u64 disk_len;
/* number of bytes that still need writing */
u64 bytes_left;
/* number of bytes that still need csumming */
u64 csum_bytes_left;
/*
* the end of the ordered extent which is behind it but
* didn't update disk_i_size. Please see the comment of
* btrfs_ordered_update_i_size();
*/
u64 outstanding_isize;
/*
* If we get truncated we need to adjust the file extent we enter for
* this ordered extent so that we do not expose stale data.
*/
u64 truncated_len;
/* flags (described above) */
unsigned long flags;
/* compression algorithm */
int compress_type;
/* reference count */
atomic_t refs;
/* the inode we belong to */
struct inode *inode;
/* list of checksums for insertion when the extent io is done */
struct list_head list;
/* If we need to wait on this to be done */
struct list_head log_list;
/* If the transaction needs to wait on this ordered extent */
struct list_head trans_list;
/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
wait_queue_head_t wait;
/* our friendly rbtree entry */
struct rb_node rb_node;
/* a per root list of all the pending ordered extents */
struct list_head root_extent_list;
struct btrfs_work work;
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
};
/*
* calculates the total size you need to allocate for an ordered sum
* structure spanning 'bytes' in the file
*/
static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
unsigned long bytes)
{
int num_sectors = (int)DIV_ROUND_UP(bytes, root->sectorsize);
return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32);
}
static inline void
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
{
spin_lock_init(&t->lock);
t->tree = RB_ROOT;
t->last = NULL;
}
void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size, int uptodate);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
int type, int compress_type);
void btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
void btrfs_get_logged_extents(struct inode *inode,
struct list_head *logged_list);
void btrfs_put_logged_extents(struct list_head *logged_list);
void btrfs_submit_logged_extents(struct list_head *logged_list,
struct btrfs_root *log);
void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
int __init ordered_data_init(void);
void ordered_data_exit(void);
#endif

71
fs/btrfs/orphan.c Normal file
View file

@ -0,0 +1,71 @@
/*
* Copyright (C) 2008 Red Hat. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#include "disk-io.h"
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset)
{
struct btrfs_path *path;
struct btrfs_key key;
int ret = 0;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = offset;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
btrfs_free_path(path);
return ret;
}
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset)
{
struct btrfs_path *path;
struct btrfs_key key;
int ret = 0;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = offset;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
if (ret) { /* JDM: Really? */
ret = -ENOENT;
goto out;
}
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
return ret;
}

349
fs/btrfs/print-tree.c Normal file
View file

@ -0,0 +1,349 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
{
int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
int i;
printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
"num_stripes %d\n",
btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk),
btrfs_chunk_type(eb, chunk), num_stripes);
for (i = 0 ; i < num_stripes ; i++) {
printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
btrfs_stripe_devid_nr(eb, chunk, i),
btrfs_stripe_offset_nr(eb, chunk, i));
}
}
static void print_dev_item(struct extent_buffer *eb,
struct btrfs_dev_item *dev_item)
{
printk(KERN_INFO "\t\tdev item devid %llu "
"total_bytes %llu bytes used %llu\n",
btrfs_device_id(eb, dev_item),
btrfs_device_total_bytes(eb, dev_item),
btrfs_device_bytes_used(eb, dev_item));
}
static void print_extent_data_ref(struct extent_buffer *eb,
struct btrfs_extent_data_ref *ref)
{
printk(KERN_INFO "\t\textent data backref root %llu "
"objectid %llu offset %llu count %u\n",
btrfs_extent_data_ref_root(eb, ref),
btrfs_extent_data_ref_objectid(eb, ref),
btrfs_extent_data_ref_offset(eb, ref),
btrfs_extent_data_ref_count(eb, ref));
}
static void print_extent_item(struct extent_buffer *eb, int slot, int type)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
struct btrfs_disk_key key;
unsigned long end;
unsigned long ptr;
u32 item_size = btrfs_item_size_nr(eb, slot);
u64 flags;
u64 offset;
if (item_size < sizeof(*ei)) {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
struct btrfs_extent_item_v0 *ei0;
BUG_ON(item_size != sizeof(*ei0));
ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
printk(KERN_INFO "\t\textent refs %u\n",
btrfs_extent_refs_v0(eb, ei0));
return;
#else
BUG();
#endif
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
flags);
if ((type == BTRFS_EXTENT_ITEM_KEY) &&
flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
btrfs_tree_block_key(eb, info, &key);
printk(KERN_INFO "\t\ttree block key (%llu %u %llu) "
"level %d\n",
btrfs_disk_key_objectid(&key), key.type,
btrfs_disk_key_offset(&key),
btrfs_tree_block_level(eb, info));
iref = (struct btrfs_extent_inline_ref *)(info + 1);
} else {
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
}
ptr = (unsigned long)iref;
end = (unsigned long)ei + item_size;
while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(eb, iref);
offset = btrfs_extent_inline_ref_offset(eb, iref);
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
printk(KERN_INFO "\t\ttree block backref "
"root %llu\n", offset);
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
printk(KERN_INFO "\t\tshared block backref "
"parent %llu\n", offset);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
print_extent_data_ref(eb, dref);
break;
case BTRFS_SHARED_DATA_REF_KEY:
sref = (struct btrfs_shared_data_ref *)(iref + 1);
printk(KERN_INFO "\t\tshared data backref "
"parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
break;
default:
BUG();
}
ptr += btrfs_extent_inline_ref_size(type);
}
WARN_ON(ptr > end);
}
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
{
struct btrfs_extent_ref_v0 *ref0;
ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
printk("\t\textent back ref root %llu gen %llu "
"owner %llu num_refs %lu\n",
btrfs_ref_root_v0(eb, ref0),
btrfs_ref_generation_v0(eb, ref0),
btrfs_ref_objectid_v0(eb, ref0),
(unsigned long)btrfs_ref_count_v0(eb, ref0));
}
#endif
static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
if (!IS_ALIGNED(item_size, sizeof(u64))) {
pr_warn("BTRFS: uuid item with illegal size %lu!\n",
(unsigned long)item_size);
return;
}
while (item_size) {
__le64 subvol_id;
read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
printk(KERN_INFO "\t\tsubvol_id %llu\n",
(unsigned long long)le64_to_cpu(subvol_id));
item_size -= sizeof(u64);
offset += sizeof(u64);
}
}
void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
{
int i;
u32 type, nr;
struct btrfs_item *item;
struct btrfs_root_item *ri;
struct btrfs_dir_item *di;
struct btrfs_inode_item *ii;
struct btrfs_block_group_item *bi;
struct btrfs_file_extent_item *fi;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
struct btrfs_dev_extent *dev_extent;
struct btrfs_key key;
struct btrfs_key found_key;
if (!l)
return;
nr = btrfs_header_nritems(l);
btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i);
type = key.type;
printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
"itemsize %d\n",
i, key.objectid, type, key.offset,
btrfs_item_offset(l, item), btrfs_item_size(l, item));
switch (type) {
case BTRFS_INODE_ITEM_KEY:
ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
printk(KERN_INFO "\t\tinode generation %llu size %llu "
"mode %o\n",
btrfs_inode_generation(l, ii),
btrfs_inode_size(l, ii),
btrfs_inode_mode(l, ii));
break;
case BTRFS_DIR_ITEM_KEY:
di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(l, di, &found_key);
printk(KERN_INFO "\t\tdir oid %llu type %u\n",
found_key.objectid,
btrfs_dir_type(l, di));
break;
case BTRFS_ROOT_ITEM_KEY:
ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
btrfs_disk_root_bytenr(l, ri),
btrfs_disk_root_refs(l, ri));
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
print_extent_item(l, i, type);
break;
case BTRFS_TREE_BLOCK_REF_KEY:
printk(KERN_INFO "\t\ttree block backref\n");
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
printk(KERN_INFO "\t\tshared block backref\n");
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = btrfs_item_ptr(l, i,
struct btrfs_extent_data_ref);
print_extent_data_ref(l, dref);
break;
case BTRFS_SHARED_DATA_REF_KEY:
sref = btrfs_item_ptr(l, i,
struct btrfs_shared_data_ref);
printk(KERN_INFO "\t\tshared data backref count %u\n",
btrfs_shared_data_ref_count(l, sref));
break;
case BTRFS_EXTENT_DATA_KEY:
fi = btrfs_item_ptr(l, i,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(l, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
printk(KERN_INFO "\t\tinline extent data "
"size %u\n",
btrfs_file_extent_inline_len(l, i, fi));
break;
}
printk(KERN_INFO "\t\textent data disk bytenr %llu "
"nr %llu\n",
btrfs_file_extent_disk_bytenr(l, fi),
btrfs_file_extent_disk_num_bytes(l, fi));
printk(KERN_INFO "\t\textent data offset %llu "
"nr %llu ram %llu\n",
btrfs_file_extent_offset(l, fi),
btrfs_file_extent_num_bytes(l, fi),
btrfs_file_extent_ram_bytes(l, fi));
break;
case BTRFS_EXTENT_REF_V0_KEY:
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
print_extent_ref_v0(l, i);
#else
BUG();
#endif
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
struct btrfs_block_group_item);
printk(KERN_INFO "\t\tblock group used %llu\n",
btrfs_disk_block_group_used(l, bi));
break;
case BTRFS_CHUNK_ITEM_KEY:
print_chunk(l, btrfs_item_ptr(l, i,
struct btrfs_chunk));
break;
case BTRFS_DEV_ITEM_KEY:
print_dev_item(l, btrfs_item_ptr(l, i,
struct btrfs_dev_item));
break;
case BTRFS_DEV_EXTENT_KEY:
dev_extent = btrfs_item_ptr(l, i,
struct btrfs_dev_extent);
printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
"\t\tchunk objectid %llu chunk offset %llu "
"length %llu\n",
btrfs_dev_extent_chunk_tree(l, dev_extent),
btrfs_dev_extent_chunk_objectid(l, dev_extent),
btrfs_dev_extent_chunk_offset(l, dev_extent),
btrfs_dev_extent_length(l, dev_extent));
break;
case BTRFS_DEV_STATS_KEY:
printk(KERN_INFO "\t\tdevice stats\n");
break;
case BTRFS_DEV_REPLACE_KEY:
printk(KERN_INFO "\t\tdev replace\n");
break;
case BTRFS_UUID_KEY_SUBVOL:
case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
print_uuid_item(l, btrfs_item_ptr_offset(l, i),
btrfs_item_size_nr(l, i));
break;
};
}
}
void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
{
int i; u32 nr;
struct btrfs_key key;
int level;
if (!c)
return;
nr = btrfs_header_nritems(c);
level = btrfs_header_level(c);
if (level == 0) {
btrfs_print_leaf(root, c);
return;
}
btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
btrfs_header_bytenr(c), level, nr,
(u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
i, key.objectid, key.type, key.offset,
btrfs_node_blockptr(c, i));
}
for (i = 0; i < nr; i++) {
struct extent_buffer *next = read_tree_block(root,
btrfs_node_blockptr(c, i),
btrfs_node_ptr_generation(c, i));
if (btrfs_is_leaf(next) &&
level != 1)
BUG();
if (btrfs_header_level(next) !=
level - 1)
BUG();
btrfs_print_tree(root, next);
free_extent_buffer(next);
}
}

23
fs/btrfs/print-tree.h Normal file
View file

@ -0,0 +1,23 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __PRINT_TREE_
#define __PRINT_TREE_
void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c);
#endif

427
fs/btrfs/props.c Normal file
View file

@ -0,0 +1,427 @@
/*
* Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/hashtable.h>
#include "props.h"
#include "btrfs_inode.h"
#include "hash.h"
#include "transaction.h"
#include "xattr.h"
#define BTRFS_PROP_HANDLERS_HT_BITS 8
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
struct prop_handler {
struct hlist_node node;
const char *xattr_name;
int (*validate)(const char *value, size_t len);
int (*apply)(struct inode *inode, const char *value, size_t len);
const char *(*extract)(struct inode *inode);
int inheritable;
};
static int prop_compression_validate(const char *value, size_t len);
static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len);
static const char *prop_compression_extract(struct inode *inode);
static struct prop_handler prop_handlers[] = {
{
.xattr_name = XATTR_BTRFS_PREFIX "compression",
.validate = prop_compression_validate,
.apply = prop_compression_apply,
.extract = prop_compression_extract,
.inheritable = 1
},
{
.xattr_name = NULL
}
};
void __init btrfs_props_init(void)
{
struct prop_handler *p;
hash_init(prop_handlers_ht);
for (p = &prop_handlers[0]; p->xattr_name; p++) {
u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
hash_add(prop_handlers_ht, &p->node, h);
}
}
static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
{
struct hlist_head *h;
h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
if (hlist_empty(h))
return NULL;
return h;
}
static const struct prop_handler *
find_prop_handler(const char *name,
const struct hlist_head *handlers)
{
struct prop_handler *h;
if (!handlers) {
u64 hash = btrfs_name_hash(name, strlen(name));
handlers = find_prop_handlers_by_hash(hash);
if (!handlers)
return NULL;
}
hlist_for_each_entry(h, handlers, node)
if (!strcmp(h->xattr_name, name))
return h;
return NULL;
}
static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
struct inode *inode,
const char *name,
const char *value,
size_t value_len,
int flags)
{
const struct prop_handler *handler;
int ret;
if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
return -EINVAL;
handler = find_prop_handler(name, NULL);
if (!handler)
return -EINVAL;
if (value_len == 0) {
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
NULL, 0, flags);
if (ret)
return ret;
ret = handler->apply(inode, NULL, 0);
ASSERT(ret == 0);
return ret;
}
ret = handler->validate(value, value_len);
if (ret)
return ret;
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
value, value_len, flags);
if (ret)
return ret;
ret = handler->apply(inode, value, value_len);
if (ret) {
__btrfs_setxattr(trans, inode, handler->xattr_name,
NULL, 0, flags);
return ret;
}
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
return 0;
}
int btrfs_set_prop(struct inode *inode,
const char *name,
const char *value,
size_t value_len,
int flags)
{
return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
}
static int iterate_object_props(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid,
void (*iterator)(void *,
const struct prop_handler *,
const char *,
size_t),
void *ctx)
{
int ret;
char *name_buf = NULL;
char *value_buf = NULL;
int name_buf_len = 0;
int value_buf_len = 0;
while (1) {
struct btrfs_key key;
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
u32 total_len, cur, this_len;
int slot;
const struct hlist_head *handlers;
slot = path->slots[0];
leaf = path->nodes[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
else if (ret > 0)
break;
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != objectid)
break;
if (key.type != BTRFS_XATTR_ITEM_KEY)
break;
handlers = find_prop_handlers_by_hash(key.offset);
if (!handlers)
goto next_slot;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
cur = 0;
total_len = btrfs_item_size_nr(leaf, slot);
while (cur < total_len) {
u32 name_len = btrfs_dir_name_len(leaf, di);
u32 data_len = btrfs_dir_data_len(leaf, di);
unsigned long name_ptr, data_ptr;
const struct prop_handler *handler;
this_len = sizeof(*di) + name_len + data_len;
name_ptr = (unsigned long)(di + 1);
data_ptr = name_ptr + name_len;
if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
name_ptr,
XATTR_BTRFS_PREFIX_LEN))
goto next_dir_item;
if (name_len >= name_buf_len) {
kfree(name_buf);
name_buf_len = name_len + 1;
name_buf = kmalloc(name_buf_len, GFP_NOFS);
if (!name_buf) {
ret = -ENOMEM;
goto out;
}
}
read_extent_buffer(leaf, name_buf, name_ptr, name_len);
name_buf[name_len] = '\0';
handler = find_prop_handler(name_buf, handlers);
if (!handler)
goto next_dir_item;
if (data_len > value_buf_len) {
kfree(value_buf);
value_buf_len = data_len;
value_buf = kmalloc(data_len, GFP_NOFS);
if (!value_buf) {
ret = -ENOMEM;
goto out;
}
}
read_extent_buffer(leaf, value_buf, data_ptr, data_len);
iterator(ctx, handler, value_buf, data_len);
next_dir_item:
cur += this_len;
di = (struct btrfs_dir_item *)((char *) di + this_len);
}
next_slot:
path->slots[0]++;
}
ret = 0;
out:
btrfs_release_path(path);
kfree(name_buf);
kfree(value_buf);
return ret;
}
static void inode_prop_iterator(void *ctx,
const struct prop_handler *handler,
const char *value,
size_t len)
{
struct inode *inode = ctx;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
ret = handler->apply(inode, value, len);
if (unlikely(ret))
btrfs_warn(root->fs_info,
"error applying prop %s to ino %llu (root %llu): %d",
handler->xattr_name, btrfs_ino(inode),
root->root_key.objectid, ret);
else
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
}
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 ino = btrfs_ino(inode);
int ret;
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
return ret;
}
static int inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *parent)
{
const struct prop_handler *h;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
if (!test_bit(BTRFS_INODE_HAS_PROPS,
&BTRFS_I(parent)->runtime_flags))
return 0;
for (h = &prop_handlers[0]; h->xattr_name; h++) {
const char *value;
u64 num_bytes;
if (!h->inheritable)
continue;
value = h->extract(parent);
if (!value)
continue;
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
num_bytes, BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
ret = __btrfs_set_prop(trans, inode, h->xattr_name,
value, strlen(value), 0);
btrfs_block_rsv_release(root, trans->block_rsv, num_bytes);
if (ret)
goto out;
}
ret = 0;
out:
return ret;
}
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *dir)
{
if (!dir)
return 0;
return inherit_props(trans, inode, dir);
}
int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_root *parent_root)
{
struct btrfs_key key;
struct inode *parent_inode, *child_inode;
int ret;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
parent_root, NULL);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);
}
ret = inherit_props(trans, child_inode, parent_inode);
iput(child_inode);
iput(parent_inode);
return ret;
}
static int prop_compression_validate(const char *value, size_t len)
{
if (!strncmp("lzo", value, len))
return 0;
else if (!strncmp("zlib", value, len))
return 0;
return -EINVAL;
}
static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len)
{
int type;
if (len == 0) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
return 0;
}
if (!strncmp("lzo", value, len))
type = BTRFS_COMPRESS_LZO;
else if (!strncmp("zlib", value, len))
type = BTRFS_COMPRESS_ZLIB;
else
return -EINVAL;
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
BTRFS_I(inode)->force_compress = type;
return 0;
}
static const char *prop_compression_extract(struct inode *inode)
{
switch (BTRFS_I(inode)->force_compress) {
case BTRFS_COMPRESS_ZLIB:
return "zlib";
case BTRFS_COMPRESS_LZO:
return "lzo";
}
return NULL;
}

42
fs/btrfs/props.h Normal file
View file

@ -0,0 +1,42 @@
/*
* Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_PROPS_H
#define __BTRFS_PROPS_H
#include "ctree.h"
void __init btrfs_props_init(void);
int btrfs_set_prop(struct inode *inode,
const char *name,
const char *value,
size_t value_len,
int flags);
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *dir);
int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_root *parent_root);
#endif

2827
fs/btrfs/qgroup.c Normal file

File diff suppressed because it is too large Load diff

108
fs/btrfs/qgroup.h Normal file
View file

@ -0,0 +1,108 @@
/*
* Copyright (C) 2014 Facebook. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_QGROUP__
#define __BTRFS_QGROUP__
/*
* A description of the operations, all of these operations only happen when we
* are adding the 1st reference for that subvolume in the case of adding space
* or on the last reference delete in the case of subtraction. The only
* exception is the last one, which is added for confusion.
*
* BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
* one pointing at the bytes we are adding. This is called on the first
* allocation.
*
* BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
* shared between subvols. This is called on the creation of a ref that already
* has refs from a different subvolume, so basically reflink.
*
* BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
* one referencing the range.
*
* BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
* refs with other subvolumes.
*/
enum btrfs_qgroup_operation_type {
BTRFS_QGROUP_OPER_ADD_EXCL,
BTRFS_QGROUP_OPER_ADD_SHARED,
BTRFS_QGROUP_OPER_SUB_EXCL,
BTRFS_QGROUP_OPER_SUB_SHARED,
BTRFS_QGROUP_OPER_SUB_SUBTREE,
};
struct btrfs_qgroup_operation {
u64 ref_root;
u64 bytenr;
u64 num_bytes;
u64 seq;
enum btrfs_qgroup_operation_type type;
struct seq_list elem;
struct rb_node n;
struct list_head list;
};
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 ref_root,
u64 bytenr, u64 num_bytes,
enum btrfs_qgroup_operation_type type,
int mod_seq);
int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
u64 rfer, u64 excl);
#endif
#endif /* __BTRFS_QGROUP__ */

2100
fs/btrfs/raid56.c Normal file

File diff suppressed because it is too large Load diff

51
fs/btrfs/raid56.h Normal file
View file

@ -0,0 +1,51 @@
/*
* Copyright (C) 2012 Fusion-io All rights reserved.
* Copyright (C) 2012 Intel Corp. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_RAID56__
#define __BTRFS_RAID56__
static inline int nr_parity_stripes(struct map_lookup *map)
{
if (map->type & BTRFS_BLOCK_GROUP_RAID5)
return 1;
else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
return 2;
else
return 0;
}
static inline int nr_data_stripes(struct map_lookup *map)
{
return map->num_stripes - nr_parity_stripes(map);
}
#define RAID5_P_STRIPE ((u64)-2)
#define RAID6_Q_STRIPE ((u64)-1)
#define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \
((x) == RAID6_Q_STRIPE))
int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len, int mirror_num);
int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
#endif

56
fs/btrfs/rcu-string.h Normal file
View file

@ -0,0 +1,56 @@
/*
* Copyright (C) 2012 Red Hat. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
struct rcu_string {
struct rcu_head rcu;
char str[0];
};
static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
{
size_t len = strlen(src) + 1;
struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
(len * sizeof(char)), mask);
if (!ret)
return ret;
strncpy(ret->str, src, len);
return ret;
}
static inline void rcu_string_free(struct rcu_string *str)
{
if (str)
kfree_rcu(str, rcu);
}
#define printk_in_rcu(fmt, ...) do { \
rcu_read_lock(); \
printk(fmt, __VA_ARGS__); \
rcu_read_unlock(); \
} while (0)
#define printk_ratelimited_in_rcu(fmt, ...) do { \
rcu_read_lock(); \
printk_ratelimited(fmt, __VA_ARGS__); \
rcu_read_unlock(); \
} while (0)
#define rcu_str_deref(rcu_str) ({ \
struct rcu_string *__str = rcu_dereference(rcu_str); \
__str->str; \
})

995
fs/btrfs/reada.c Normal file
View file

@ -0,0 +1,995 @@
/*
* Copyright (C) 2011 STRATO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
#include "transaction.h"
#include "dev-replace.h"
#undef DEBUG
/*
* This is the implementation for the generic read ahead framework.
*
* To trigger a readahead, btrfs_reada_add must be called. It will start
* a read ahead for the given range [start, end) on tree root. The returned
* handle can either be used to wait on the readahead to finish
* (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
*
* The read ahead works as follows:
* On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
* reada_start_machine will then search for extents to prefetch and trigger
* some reads. When a read finishes for a node, all contained node/leaf
* pointers that lie in the given range will also be enqueued. The reads will
* be triggered in sequential order, thus giving a big win over a naive
* enumeration. It will also make use of multi-device layouts. Each disk
* will have its on read pointer and all disks will by utilized in parallel.
* Also will no two disks read both sides of a mirror simultaneously, as this
* would waste seeking capacity. Instead both disks will read different parts
* of the filesystem.
* Any number of readaheads can be started in parallel. The read order will be
* determined globally, i.e. 2 parallel readaheads will normally finish faster
* than the 2 started one after another.
*/
#define MAX_IN_FLIGHT 6
struct reada_extctl {
struct list_head list;
struct reada_control *rc;
u64 generation;
};
struct reada_extent {
u64 logical;
struct btrfs_key top;
u32 blocksize;
int err;
struct list_head extctl;
int refcnt;
spinlock_t lock;
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
struct btrfs_device *scheduled_for;
};
struct reada_zone {
u64 start;
u64 end;
u64 elems;
struct list_head list;
spinlock_t lock;
int locked;
struct btrfs_device *device;
struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
* self */
int ndevs;
struct kref refcnt;
};
struct reada_machine_work {
struct btrfs_work work;
struct btrfs_fs_info *fs_info;
};
static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *);
static void reada_control_release(struct kref *kref);
static void reada_zone_release(struct kref *kref);
static void reada_start_machine(struct btrfs_fs_info *fs_info);
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
static int reada_add_block(struct reada_control *rc, u64 logical,
struct btrfs_key *top, int level, u64 generation);
/* recurses */
/* in case of err, eb might be NULL */
static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err)
{
int level = 0;
int nritems;
int i;
u64 bytenr;
u64 generation;
struct reada_extent *re;
struct btrfs_fs_info *fs_info = root->fs_info;
struct list_head list;
unsigned long index = start >> PAGE_CACHE_SHIFT;
struct btrfs_device *for_dev;
if (eb)
level = btrfs_header_level(eb);
/* find extent */
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (!re)
return -1;
spin_lock(&re->lock);
/*
* just take the full list from the extent. afterwards we
* don't need the lock anymore
*/
list_replace_init(&re->extctl, &list);
for_dev = re->scheduled_for;
re->scheduled_for = NULL;
spin_unlock(&re->lock);
if (err == 0) {
nritems = level ? btrfs_header_nritems(eb) : 0;
generation = btrfs_header_generation(eb);
/*
* FIXME: currently we just set nritems to 0 if this is a leaf,
* effectively ignoring the content. In a next step we could
* trigger more readahead depending from the content, e.g.
* fetch the checksums for the extents in the leaf.
*/
} else {
/*
* this is the error case, the extent buffer has not been
* read correctly. We won't access anything from it and
* just cleanup our data structures. Effectively this will
* cut the branch below this node from read ahead.
*/
nritems = 0;
generation = 0;
}
for (i = 0; i < nritems; i++) {
struct reada_extctl *rec;
u64 n_gen;
struct btrfs_key key;
struct btrfs_key next_key;
btrfs_node_key_to_cpu(eb, &key, i);
if (i + 1 < nritems)
btrfs_node_key_to_cpu(eb, &next_key, i + 1);
else
next_key = re->top;
bytenr = btrfs_node_blockptr(eb, i);
n_gen = btrfs_node_ptr_generation(eb, i);
list_for_each_entry(rec, &list, list) {
struct reada_control *rc = rec->rc;
/*
* if the generation doesn't match, just ignore this
* extctl. This will probably cut off a branch from
* prefetch. Alternatively one could start a new (sub-)
* prefetch for this branch, starting again from root.
* FIXME: move the generation check out of this loop
*/
#ifdef DEBUG
if (rec->generation != generation) {
btrfs_debug(root->fs_info,
"generation mismatch for (%llu,%d,%llu) %llu != %llu",
key.objectid, key.type, key.offset,
rec->generation, generation);
}
#endif
if (rec->generation == generation &&
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
reada_add_block(rc, bytenr, &next_key,
level - 1, n_gen);
}
}
/*
* free extctl records
*/
while (!list_empty(&list)) {
struct reada_control *rc;
struct reada_extctl *rec;
rec = list_first_entry(&list, struct reada_extctl, list);
list_del(&rec->list);
rc = rec->rc;
kfree(rec);
kref_get(&rc->refcnt);
if (atomic_dec_and_test(&rc->elems)) {
kref_put(&rc->refcnt, reada_control_release);
wake_up(&rc->wait);
}
kref_put(&rc->refcnt, reada_control_release);
reada_extent_put(fs_info, re); /* one ref for each entry */
}
reada_extent_put(fs_info, re); /* our ref */
if (for_dev)
atomic_dec(&for_dev->reada_in_flight);
return 0;
}
/*
* start is passed separately in case eb in NULL, which may be the case with
* failed I/O
*/
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err)
{
int ret;
ret = __readahead_hook(root, eb, start, err);
reada_start_machine(root->fs_info);
return ret;
}
static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_device *dev, u64 logical,
struct btrfs_bio *bbio)
{
int ret;
struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
int i;
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1);
if (ret == 1)
kref_get(&zone->refcnt);
spin_unlock(&fs_info->reada_lock);
if (ret == 1) {
if (logical >= zone->start && logical < zone->end)
return zone;
spin_lock(&fs_info->reada_lock);
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
start = cache->key.objectid;
end = start + cache->key.offset - 1;
btrfs_put_block_group(cache);
zone = kzalloc(sizeof(*zone), GFP_NOFS);
if (!zone)
return NULL;
zone->start = start;
zone->end = end;
INIT_LIST_HEAD(&zone->list);
spin_lock_init(&zone->lock);
zone->locked = 0;
kref_init(&zone->refcnt);
zone->elems = 0;
zone->device = dev; /* our device always sits at index 0 */
for (i = 0; i < bbio->num_stripes; ++i) {
/* bounds have already been checked */
zone->devs[i] = bbio->stripes[i].dev;
}
zone->ndevs = bbio->num_stripes;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&dev->reada_zones,
(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone);
if (ret == -EEXIST) {
kfree(zone);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1);
if (ret == 1)
kref_get(&zone->refcnt);
}
spin_unlock(&fs_info->reada_lock);
return zone;
}
static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 logical,
struct btrfs_key *top, int level)
{
int ret;
struct reada_extent *re = NULL;
struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing;
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (re)
return re;
re = kzalloc(sizeof(*re), GFP_NOFS);
if (!re)
return NULL;
blocksize = root->nodesize;
re->logical = logical;
re->blocksize = blocksize;
re->top = *top;
INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock);
re->refcnt = 1;
/*
* map block
*/
length = blocksize;
ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
&bbio, 0);
if (ret || !bbio || length < blocksize)
goto error;
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
btrfs_err(root->fs_info,
"readahead: more than %d copies not supported",
BTRFS_MAX_MIRRORS);
goto error;
}
for (nzones = 0; nzones < bbio->num_stripes; ++nzones) {
struct reada_zone *zone;
dev = bbio->stripes[nzones].dev;
zone = reada_find_zone(fs_info, dev, logical, bbio);
if (!zone)
break;
re->zones[nzones] = zone;
spin_lock(&zone->lock);
if (!zone->elems)
kref_get(&zone->refcnt);
++zone->elems;
spin_unlock(&zone->lock);
spin_lock(&fs_info->reada_lock);
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
re->nzones = nzones;
if (nzones == 0) {
/* not a single zone found, error and out */
goto error;
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
if (dev == prev_dev) {
/*
* in case of DUP, just add the first zone. As both
* are on the same device, there's nothing to gain
* from adding both.
* Also, it wouldn't work, as the tree is per device
* and adding would fail with EEXIST
*/
continue;
}
if (!dev->bdev) {
/*
* cannot read ahead on missing device, but for RAID5/6,
* REQ_GET_READ_MIRRORS return 1. So don't skip missing
* device for such case.
*/
if (nzones > 1)
continue;
}
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
* as this device is selected for reading only as
* a last resort, skip it for read ahead.
*/
continue;
}
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
while (--i >= 0) {
dev = bbio->stripes[i].dev;
BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */
radix_tree_delete(&dev->reada_extents, index);
}
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
}
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio);
return re;
error:
while (nzones) {
struct reada_zone *zone;
--nzones;
zone = re->zones[nzones];
kref_get(&zone->refcnt);
spin_lock(&zone->lock);
--zone->elems;
if (zone->elems == 0) {
/*
* no fs_info->reada_lock needed, as this can't be
* the last ref
*/
kref_put(&zone->refcnt, reada_zone_release);
}
spin_unlock(&zone->lock);
spin_lock(&fs_info->reada_lock);
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
kfree(bbio);
kfree(re);
return re_exist;
}
static void reada_extent_put(struct btrfs_fs_info *fs_info,
struct reada_extent *re)
{
int i;
unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
spin_lock(&fs_info->reada_lock);
if (--re->refcnt) {
spin_unlock(&fs_info->reada_lock);
return;
}
radix_tree_delete(&fs_info->reada_tree, index);
for (i = 0; i < re->nzones; ++i) {
struct reada_zone *zone = re->zones[i];
radix_tree_delete(&zone->device->reada_extents, index);
}
spin_unlock(&fs_info->reada_lock);
for (i = 0; i < re->nzones; ++i) {
struct reada_zone *zone = re->zones[i];
kref_get(&zone->refcnt);
spin_lock(&zone->lock);
--zone->elems;
if (zone->elems == 0) {
/* no fs_info->reada_lock needed, as this can't be
* the last ref */
kref_put(&zone->refcnt, reada_zone_release);
}
spin_unlock(&zone->lock);
spin_lock(&fs_info->reada_lock);
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
if (re->scheduled_for)
atomic_dec(&re->scheduled_for->reada_in_flight);
kfree(re);
}
static void reada_zone_release(struct kref *kref)
{
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
radix_tree_delete(&zone->device->reada_zones,
zone->end >> PAGE_CACHE_SHIFT);
kfree(zone);
}
static void reada_control_release(struct kref *kref)
{
struct reada_control *rc = container_of(kref, struct reada_control,
refcnt);
kfree(rc);
}
static int reada_add_block(struct reada_control *rc, u64 logical,
struct btrfs_key *top, int level, u64 generation)
{
struct btrfs_root *root = rc->root;
struct reada_extent *re;
struct reada_extctl *rec;
re = reada_find_extent(root, logical, top, level); /* takes one ref */
if (!re)
return -1;
rec = kzalloc(sizeof(*rec), GFP_NOFS);
if (!rec) {
reada_extent_put(root->fs_info, re);
return -1;
}
rec->rc = rc;
rec->generation = generation;
atomic_inc(&rc->elems);
spin_lock(&re->lock);
list_add_tail(&rec->list, &re->extctl);
spin_unlock(&re->lock);
/* leave the ref on the extent */
return 0;
}
/*
* called with fs_info->reada_lock held
*/
static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
{
int i;
unsigned long index = zone->end >> PAGE_CACHE_SHIFT;
for (i = 0; i < zone->ndevs; ++i) {
struct reada_zone *peer;
peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index);
if (peer && peer->device != zone->device)
peer->locked = lock;
}
}
/*
* called with fs_info->reada_lock held
*/
static int reada_pick_zone(struct btrfs_device *dev)
{
struct reada_zone *top_zone = NULL;
struct reada_zone *top_locked_zone = NULL;
u64 top_elems = 0;
u64 top_locked_elems = 0;
unsigned long index = 0;
int ret;
if (dev->reada_curr_zone) {
reada_peer_zones_set_lock(dev->reada_curr_zone, 0);
kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release);
dev->reada_curr_zone = NULL;
}
/* pick the zone with the most elements */
while (1) {
struct reada_zone *zone;
ret = radix_tree_gang_lookup(&dev->reada_zones,
(void **)&zone, index, 1);
if (ret == 0)
break;
index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
if (zone->locked) {
if (zone->elems > top_locked_elems) {
top_locked_elems = zone->elems;
top_locked_zone = zone;
}
} else {
if (zone->elems > top_elems) {
top_elems = zone->elems;
top_zone = zone;
}
}
}
if (top_zone)
dev->reada_curr_zone = top_zone;
else if (top_locked_zone)
dev->reada_curr_zone = top_locked_zone;
else
return 0;
dev->reada_next = dev->reada_curr_zone->start;
kref_get(&dev->reada_curr_zone->refcnt);
reada_peer_zones_set_lock(dev->reada_curr_zone, 1);
return 1;
}
static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
struct btrfs_device *dev)
{
struct reada_extent *re = NULL;
int mirror_num = 0;
struct extent_buffer *eb = NULL;
u64 logical;
u32 blocksize;
int ret;
int i;
int need_kick = 0;
spin_lock(&fs_info->reada_lock);
if (dev->reada_curr_zone == NULL) {
ret = reada_pick_zone(dev);
if (!ret) {
spin_unlock(&fs_info->reada_lock);
return 0;
}
}
/*
* FIXME currently we issue the reads one extent at a time. If we have
* a contiguous block of extents, we could also coagulate them or use
* plugging to speed things up
*/
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
dev->reada_next >> PAGE_CACHE_SHIFT, 1);
if (ret == 0 || re->logical >= dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev);
if (!ret) {
spin_unlock(&fs_info->reada_lock);
return 0;
}
re = NULL;
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
dev->reada_next >> PAGE_CACHE_SHIFT, 1);
}
if (ret == 0) {
spin_unlock(&fs_info->reada_lock);
return 0;
}
dev->reada_next = re->logical + re->blocksize;
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
/*
* find mirror num
*/
for (i = 0; i < re->nzones; ++i) {
if (re->zones[i]->device == dev) {
mirror_num = i + 1;
break;
}
}
logical = re->logical;
blocksize = re->blocksize;
spin_lock(&re->lock);
if (re->scheduled_for == NULL) {
re->scheduled_for = dev;
need_kick = 1;
}
spin_unlock(&re->lock);
reada_extent_put(fs_info, re);
if (!need_kick)
return 0;
atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize,
mirror_num, &eb);
if (ret)
__readahead_hook(fs_info->extent_root, NULL, logical, ret);
else if (eb)
__readahead_hook(fs_info->extent_root, eb, eb->start, ret);
if (eb)
free_extent_buffer(eb);
return 1;
}
static void reada_start_machine_worker(struct btrfs_work *work)
{
struct reada_machine_work *rmw;
struct btrfs_fs_info *fs_info;
int old_ioprio;
rmw = container_of(work, struct reada_machine_work, work);
fs_info = rmw->fs_info;
kfree(rmw);
old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
task_nice_ioprio(current));
set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info);
set_task_ioprio(current, old_ioprio);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u64 enqueued;
u64 total = 0;
int i;
do {
enqueued = 0;
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (atomic_read(&device->reada_in_flight) <
MAX_IN_FLIGHT)
enqueued += reada_start_machine_dev(fs_info,
device);
}
total += enqueued;
} while (enqueued && total < 10000);
if (enqueued == 0)
return;
/*
* If everything is already in the cache, this is effectively single
* threaded. To a) not hold the caller for too long and b) to utilize
* more cores, we broke the loop above after 10000 iterations and now
* enqueue to workers to finish it. This will distribute the load to
* the cores.
*/
for (i = 0; i < 2; ++i)
reada_start_machine(fs_info);
}
static void reada_start_machine(struct btrfs_fs_info *fs_info)
{
struct reada_machine_work *rmw;
rmw = kzalloc(sizeof(*rmw), GFP_NOFS);
if (!rmw) {
/* FIXME we cannot handle this properly right now */
BUG();
}
btrfs_init_work(&rmw->work, btrfs_readahead_helper,
reada_start_machine_worker, NULL, NULL);
rmw->fs_info = fs_info;
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
}
#ifdef DEBUG
static void dump_devs(struct btrfs_fs_info *fs_info, int all)
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
unsigned long index;
int ret;
int i;
int j;
int cnt;
spin_lock(&fs_info->reada_lock);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
printk(KERN_DEBUG "dev %lld has %d in flight\n", device->devid,
atomic_read(&device->reada_in_flight));
index = 0;
while (1) {
struct reada_zone *zone;
ret = radix_tree_gang_lookup(&device->reada_zones,
(void **)&zone, index, 1);
if (ret == 0)
break;
printk(KERN_DEBUG " zone %llu-%llu elems %llu locked "
"%d devs", zone->start, zone->end, zone->elems,
zone->locked);
for (j = 0; j < zone->ndevs; ++j) {
printk(KERN_CONT " %lld",
zone->devs[j]->devid);
}
if (device->reada_curr_zone == zone)
printk(KERN_CONT " curr off %llu",
device->reada_next - zone->start);
printk(KERN_CONT "\n");
index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
}
cnt = 0;
index = 0;
while (all) {
struct reada_extent *re = NULL;
ret = radix_tree_gang_lookup(&device->reada_extents,
(void **)&re, index, 1);
if (ret == 0)
break;
printk(KERN_DEBUG
" re: logical %llu size %u empty %d for %lld",
re->logical, re->blocksize,
list_empty(&re->extctl), re->scheduled_for ?
re->scheduled_for->devid : -1);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start,
re->zones[i]->end);
for (j = 0; j < re->zones[i]->ndevs; ++j) {
printk(KERN_CONT " %lld",
re->zones[i]->devs[j]->devid);
}
}
printk(KERN_CONT "\n");
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
if (++cnt > 15)
break;
}
}
index = 0;
cnt = 0;
while (all) {
struct reada_extent *re = NULL;
ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re,
index, 1);
if (ret == 0)
break;
if (!re->scheduled_for) {
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
continue;
}
printk(KERN_DEBUG
"re: logical %llu size %u list empty %d for %lld",
re->logical, re->blocksize, list_empty(&re->extctl),
re->scheduled_for ? re->scheduled_for->devid : -1);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start,
re->zones[i]->end);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start,
re->zones[i]->end);
for (j = 0; j < re->zones[i]->ndevs; ++j) {
printk(KERN_CONT " %lld",
re->zones[i]->devs[j]->devid);
}
}
}
printk(KERN_CONT "\n");
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
}
spin_unlock(&fs_info->reada_lock);
}
#endif
/*
* interface
*/
struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *key_start, struct btrfs_key *key_end)
{
struct reada_control *rc;
u64 start;
u64 generation;
int level;
struct extent_buffer *node;
static struct btrfs_key max_key = {
.objectid = (u64)-1,
.type = (u8)-1,
.offset = (u64)-1
};
rc = kzalloc(sizeof(*rc), GFP_NOFS);
if (!rc)
return ERR_PTR(-ENOMEM);
rc->root = root;
rc->key_start = *key_start;
rc->key_end = *key_end;
atomic_set(&rc->elems, 0);
init_waitqueue_head(&rc->wait);
kref_init(&rc->refcnt);
kref_get(&rc->refcnt); /* one ref for having elements */
node = btrfs_root_node(root);
start = node->start;
level = btrfs_header_level(node);
generation = btrfs_header_generation(node);
free_extent_buffer(node);
if (reada_add_block(rc, start, &max_key, level, generation)) {
kfree(rc);
return ERR_PTR(-ENOMEM);
}
reada_start_machine(root->fs_info);
return rc;
}
#ifdef DEBUG
int btrfs_reada_wait(void *handle)
{
struct reada_control *rc = handle;
while (atomic_read(&rc->elems)) {
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
5 * HZ);
dump_devs(rc->root->fs_info,
atomic_read(&rc->elems) < 10 ? 1 : 0);
}
dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
kref_put(&rc->refcnt, reada_control_release);
return 0;
}
#else
int btrfs_reada_wait(void *handle)
{
struct reada_control *rc = handle;
while (atomic_read(&rc->elems)) {
wait_event(rc->wait, atomic_read(&rc->elems) == 0);
}
kref_put(&rc->refcnt, reada_control_release);
return 0;
}
#endif
void btrfs_reada_detach(void *handle)
{
struct reada_control *rc = handle;
kref_put(&rc->refcnt, reada_control_release);
}

4655
fs/btrfs/relocation.c Normal file

File diff suppressed because it is too large Load diff

497
fs/btrfs/root-tree.c Normal file
View file

@ -0,0 +1,497 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/err.h>
#include <linux/uuid.h>
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
/*
* Read a root item from the tree. In case we detect a root item smaller then
* sizeof(root_item), we know it's an old version of the root structure and
* initialize all new fields to zero. The same happens if we detect mismatching
* generation numbers as then we know the root was once mounted with an older
* kernel that was not aware of the root item structure change.
*/
static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
struct btrfs_root_item *item)
{
uuid_le uuid;
int len;
int need_reset = 0;
len = btrfs_item_size_nr(eb, slot);
read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot),
min_t(int, len, (int)sizeof(*item)));
if (len < sizeof(*item))
need_reset = 1;
if (!need_reset && btrfs_root_generation(item)
!= btrfs_root_generation_v2(item)) {
if (btrfs_root_generation_v2(item) != 0) {
printk(KERN_WARNING "BTRFS: mismatching "
"generation and generation_v2 "
"found in root item. This root "
"was probably mounted with an "
"older kernel. Resetting all "
"new fields.\n");
}
need_reset = 1;
}
if (need_reset) {
memset(&item->generation_v2, 0,
sizeof(*item) - offsetof(struct btrfs_root_item,
generation_v2));
uuid_le_gen(&uuid);
memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE);
}
}
/*
* btrfs_find_root - lookup the root by the key.
* root: the root of the root tree
* search_key: the key to search
* path: the path we search
* root_item: the root item of the tree we look for
* root_key: the reak key of the tree we look for
*
* If ->offset of 'seach_key' is -1ULL, it means we are not sure the offset
* of the search key, just lookup the root with the highest offset for a
* given objectid.
*
* If we find something return 0, otherwise > 0, < 0 on error.
*/
int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key)
{
struct btrfs_key found_key;
struct extent_buffer *l;
int ret;
int slot;
ret = btrfs_search_slot(NULL, root, search_key, path, 0, 0);
if (ret < 0)
return ret;
if (search_key->offset != -1ULL) { /* the search key is exact */
if (ret > 0)
goto out;
} else {
BUG_ON(ret == 0); /* Logical error */
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
ret = 0;
}
l = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
if (found_key.objectid != search_key->objectid ||
found_key.type != BTRFS_ROOT_ITEM_KEY) {
ret = 1;
goto out;
}
if (root_item)
btrfs_read_root_item(l, slot, root_item);
if (root_key)
memcpy(root_key, &found_key, sizeof(found_key));
out:
btrfs_release_path(path);
return ret;
}
void btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node)
{
btrfs_set_root_bytenr(item, node->start);
btrfs_set_root_level(item, btrfs_header_level(node));
btrfs_set_root_generation(item, btrfs_header_generation(node));
}
/*
* copy the data in 'item' into the btree
*/
int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_root_item
*item)
{
struct btrfs_path *path;
struct extent_buffer *l;
int ret;
int slot;
unsigned long ptr;
int old_len;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
if (ret != 0) {
btrfs_print_leaf(root, path->nodes[0]);
btrfs_crit(root->fs_info, "unable to update root key %llu %u %llu",
key->objectid, key->type, key->offset);
BUG_ON(1);
}
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr_offset(l, slot);
old_len = btrfs_item_size_nr(l, slot);
/*
* If this is the first time we update the root item which originated
* from an older kernel, we need to enlarge the item size to make room
* for the added fields.
*/
if (old_len < sizeof(*item)) {
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
ret = btrfs_del_item(trans, root, path);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr_offset(l, slot);
}
/*
* Update generation_v2 so at the next mount we know the new root
* fields are valid.
*/
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
write_extent_buffer(l, item, ptr, sizeof(*item));
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_root_item *item)
{
/*
* Make sure generation v1 and v2 match. See update_root for details.
*/
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
return btrfs_insert_item(trans, root, key, item, sizeof(*item));
}
int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
{
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key root_key;
struct btrfs_root *root;
int err = 0;
int ret;
bool can_recover = true;
if (tree_root->fs_info->sb->s_flags & MS_RDONLY)
can_recover = false;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = 0;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
while (1) {
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0) {
err = ret;
break;
}
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(tree_root, path);
if (ret < 0)
err = ret;
if (ret != 0)
break;
leaf = path->nodes[0];
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
btrfs_release_path(path);
if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
root_key.objectid = key.offset;
key.offset++;
root = btrfs_read_fs_root(tree_root, &root_key);
err = PTR_ERR_OR_ZERO(root);
if (err && err != -ENOENT) {
break;
} else if (err == -ENOENT) {
struct btrfs_trans_handle *trans;
btrfs_release_path(path);
trans = btrfs_join_transaction(tree_root);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
btrfs_error(tree_root->fs_info, err,
"Failed to start trans to delete "
"orphan item");
break;
}
err = btrfs_del_orphan_item(trans, tree_root,
root_key.objectid);
btrfs_end_transaction(trans, tree_root);
if (err) {
btrfs_error(tree_root->fs_info, err,
"Failed to delete root orphan "
"item");
break;
}
continue;
}
err = btrfs_init_fs_root(root);
if (err) {
btrfs_free_fs_root(root);
break;
}
set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
err = btrfs_insert_fs_root(root->fs_info, root);
if (err) {
BUG_ON(err == -EEXIST);
btrfs_free_fs_root(root);
break;
}
if (btrfs_root_refs(&root->root_item) == 0)
btrfs_add_dead_root(root);
}
btrfs_free_path(path);
return err;
}
/* drop the root item for 'key' from 'root' */
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key)
{
struct btrfs_path *path;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
if (ret < 0)
goto out;
BUG_ON(ret != 0);
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
const char *name, int name_len)
{
struct btrfs_path *path;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
struct btrfs_key key;
unsigned long ptr;
int err = 0;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = root_id;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = ref_id;
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
BUG_ON(ret < 0);
if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1);
WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
*sequence = btrfs_root_ref_sequence(leaf, ref);
ret = btrfs_del_item(trans, tree_root, path);
if (ret) {
err = ret;
goto out;
}
} else
err = -ENOENT;
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
btrfs_release_path(path);
key.objectid = ref_id;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = root_id;
goto again;
}
out:
btrfs_free_path(path);
return err;
}
/*
* add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
* or BTRFS_ROOT_BACKREF_KEY.
*
* The dirid, sequence, name and name_len refer to the directory entry
* that is referencing the root.
*
* For a forward ref, the root_id is the id of the tree referencing
* the root and ref_id is the id of the subvol or snapshot.
*
* For a back ref the root_id is the id of the subvol or snapshot and
* ref_id is the id of the tree referencing it.
*
* Will return 0, -ENOMEM, or anything from the CoW path
*/
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
const char *name, int name_len)
{
struct btrfs_key key;
int ret;
struct btrfs_path *path;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
unsigned long ptr;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = root_id;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = ref_id;
again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name_len);
if (ret) {
btrfs_abort_transaction(trans, tree_root, ret);
btrfs_free_path(path);
return ret;
}
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
btrfs_set_root_ref_dirid(leaf, ref, dirid);
btrfs_set_root_ref_sequence(leaf, ref, sequence);
btrfs_set_root_ref_name_len(leaf, ref, name_len);
ptr = (unsigned long)(ref + 1);
write_extent_buffer(leaf, name, ptr, name_len);
btrfs_mark_buffer_dirty(leaf);
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
btrfs_release_path(path);
key.objectid = ref_id;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = root_id;
goto again;
}
btrfs_free_path(path);
return 0;
}
/*
* Old btrfs forgets to init root_item->flags and root_item->byte_limit
* for subvolumes. To work around this problem, we steal a bit from
* root_item->inode_item->flags, and use it to indicate if those fields
* have been properly initialized.
*/
void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
{
u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode);
if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
btrfs_set_stack_inode_flags(&root_item->inode, inode_flags);
btrfs_set_root_flags(root_item, 0);
btrfs_set_root_limit(root_item, 0);
}
}
void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root_item *item = &root->root_item;
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_item_lock);
btrfs_set_root_ctransid(item, trans->transid);
btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
spin_unlock(&root->root_item_lock);
}

3487
fs/btrfs/scrub.c Normal file

File diff suppressed because it is too large Load diff

5816
fs/btrfs/send.c Normal file

File diff suppressed because it is too large Load diff

134
fs/btrfs/send.h Normal file
View file

@ -0,0 +1,134 @@
/*
* Copyright (C) 2012 Alexander Block. All rights reserved.
* Copyright (C) 2012 STRATO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
#define BTRFS_SEND_STREAM_VERSION 1
#define BTRFS_SEND_BUF_SIZE (1024 * 64)
#define BTRFS_SEND_READ_SIZE (1024 * 48)
enum btrfs_tlv_type {
BTRFS_TLV_U8,
BTRFS_TLV_U16,
BTRFS_TLV_U32,
BTRFS_TLV_U64,
BTRFS_TLV_BINARY,
BTRFS_TLV_STRING,
BTRFS_TLV_UUID,
BTRFS_TLV_TIMESPEC,
};
struct btrfs_stream_header {
char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)];
__le32 version;
} __attribute__ ((__packed__));
struct btrfs_cmd_header {
/* len excluding the header */
__le32 len;
__le16 cmd;
/* crc including the header with zero crc field */
__le32 crc;
} __attribute__ ((__packed__));
struct btrfs_tlv_header {
__le16 tlv_type;
/* len excluding the header */
__le16 tlv_len;
} __attribute__ ((__packed__));
/* commands */
enum btrfs_send_cmd {
BTRFS_SEND_C_UNSPEC,
BTRFS_SEND_C_SUBVOL,
BTRFS_SEND_C_SNAPSHOT,
BTRFS_SEND_C_MKFILE,
BTRFS_SEND_C_MKDIR,
BTRFS_SEND_C_MKNOD,
BTRFS_SEND_C_MKFIFO,
BTRFS_SEND_C_MKSOCK,
BTRFS_SEND_C_SYMLINK,
BTRFS_SEND_C_RENAME,
BTRFS_SEND_C_LINK,
BTRFS_SEND_C_UNLINK,
BTRFS_SEND_C_RMDIR,
BTRFS_SEND_C_SET_XATTR,
BTRFS_SEND_C_REMOVE_XATTR,
BTRFS_SEND_C_WRITE,
BTRFS_SEND_C_CLONE,
BTRFS_SEND_C_TRUNCATE,
BTRFS_SEND_C_CHMOD,
BTRFS_SEND_C_CHOWN,
BTRFS_SEND_C_UTIMES,
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
__BTRFS_SEND_C_MAX,
};
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
/* attributes in send stream */
enum {
BTRFS_SEND_A_UNSPEC,
BTRFS_SEND_A_UUID,
BTRFS_SEND_A_CTRANSID,
BTRFS_SEND_A_INO,
BTRFS_SEND_A_SIZE,
BTRFS_SEND_A_MODE,
BTRFS_SEND_A_UID,
BTRFS_SEND_A_GID,
BTRFS_SEND_A_RDEV,
BTRFS_SEND_A_CTIME,
BTRFS_SEND_A_MTIME,
BTRFS_SEND_A_ATIME,
BTRFS_SEND_A_OTIME,
BTRFS_SEND_A_XATTR_NAME,
BTRFS_SEND_A_XATTR_DATA,
BTRFS_SEND_A_PATH,
BTRFS_SEND_A_PATH_TO,
BTRFS_SEND_A_PATH_LINK,
BTRFS_SEND_A_FILE_OFFSET,
BTRFS_SEND_A_DATA,
BTRFS_SEND_A_CLONE_UUID,
BTRFS_SEND_A_CLONE_CTRANSID,
BTRFS_SEND_A_CLONE_PATH,
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
__BTRFS_SEND_A_MAX,
};
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
#endif

142
fs/btrfs/struct-funcs.c Normal file
View file

@ -0,0 +1,142 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/highmem.h>
#include <asm/unaligned.h>
#include "ctree.h"
static inline u8 get_unaligned_le8(const void *p)
{
return *(u8 *)p;
}
static inline void put_unaligned_le8(u8 val, void *p)
{
*(u8 *)p = val;
}
/*
* this is some deeply nasty code.
*
* The end result is that anyone who #includes ctree.h gets a
* declaration for the btrfs_set_foo functions and btrfs_foo functions,
* which are wappers of btrfs_set_token_#bits functions and
* btrfs_get_token_#bits functions, which are defined in this file.
*
* These setget functions do all the extent_buffer related mapping
* required to efficiently read and write specific fields in the extent
* buffers. Every pointer to metadata items in btrfs is really just
* an unsigned long offset into the extent buffer which has been
* cast to a specific type. This gives us all the gcc type checking.
*
* The extent buffer api is used to do the page spanning work required to
* have a metadata blocksize different from the page size.
*/
#define DEFINE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
u##bits res; \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
res = get_unaligned_le##bits(p + off); \
return res; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
\
read_extent_buffer(eb, &leres, offset, size); \
return le##bits##_to_cpu(leres); \
} \
p = kaddr + part_offset - map_start; \
res = get_unaligned_le##bits(p + off); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
return res; \
} \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
void *ptr, unsigned long off, u##bits val, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
put_unaligned_le##bits(val, p + off); \
return; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
\
val2 = cpu_to_le##bits(val); \
write_extent_buffer(eb, &val2, offset, size); \
return; \
} \
p = kaddr + part_offset - map_start; \
put_unaligned_le##bits(val, p + off); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
}
DEFINE_BTRFS_SETGET_BITS(8)
DEFINE_BTRFS_SETGET_BITS(16)
DEFINE_BTRFS_SETGET_BITS(32)
DEFINE_BTRFS_SETGET_BITS(64)
void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
struct btrfs_key_ptr, key, disk_key);
}

2165
fs/btrfs/super.c Normal file

File diff suppressed because it is too large Load diff

758
fs/btrfs/sysfs.c Normal file
View file

@ -0,0 +1,758 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/kobject.h>
#include <linux/bug.h>
#include <linux/genhd.h>
#include <linux/debugfs.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "sysfs.h"
#include "volumes.h"
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
static u64 get_features(struct btrfs_fs_info *fs_info,
enum btrfs_feature_set set)
{
struct btrfs_super_block *disk_super = fs_info->super_copy;
if (set == FEAT_COMPAT)
return btrfs_super_compat_flags(disk_super);
else if (set == FEAT_COMPAT_RO)
return btrfs_super_compat_ro_flags(disk_super);
else
return btrfs_super_incompat_flags(disk_super);
}
static void set_features(struct btrfs_fs_info *fs_info,
enum btrfs_feature_set set, u64 features)
{
struct btrfs_super_block *disk_super = fs_info->super_copy;
if (set == FEAT_COMPAT)
btrfs_set_super_compat_flags(disk_super, features);
else if (set == FEAT_COMPAT_RO)
btrfs_set_super_compat_ro_flags(disk_super, features);
else
btrfs_set_super_incompat_flags(disk_super, features);
}
static int can_modify_feature(struct btrfs_feature_attr *fa)
{
int val = 0;
u64 set, clear;
switch (fa->feature_set) {
case FEAT_COMPAT:
set = BTRFS_FEATURE_COMPAT_SAFE_SET;
clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
break;
case FEAT_COMPAT_RO:
set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
break;
case FEAT_INCOMPAT:
set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
break;
default:
printk(KERN_WARNING "btrfs: sysfs: unknown feature set %d\n",
fa->feature_set);
return 0;
}
if (set & fa->feature_bit)
val |= 1;
if (clear & fa->feature_bit)
val |= 2;
return val;
}
static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
int val = 0;
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
if (fs_info) {
u64 features = get_features(fs_info, fa->feature_set);
if (features & fa->feature_bit)
val = 1;
} else
val = can_modify_feature(fa);
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t count)
{
struct btrfs_fs_info *fs_info;
struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
struct btrfs_trans_handle *trans;
u64 features, set, clear;
unsigned long val;
int ret;
fs_info = to_fs_info(kobj);
if (!fs_info)
return -EPERM;
ret = kstrtoul(skip_spaces(buf), 0, &val);
if (ret)
return ret;
if (fa->feature_set == FEAT_COMPAT) {
set = BTRFS_FEATURE_COMPAT_SAFE_SET;
clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
} else if (fa->feature_set == FEAT_COMPAT_RO) {
set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
} else {
set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
}
features = get_features(fs_info, fa->feature_set);
/* Nothing to do */
if ((val && (features & fa->feature_bit)) ||
(!val && !(features & fa->feature_bit)))
return count;
if ((val && !(set & fa->feature_bit)) ||
(!val && !(clear & fa->feature_bit))) {
btrfs_info(fs_info,
"%sabling feature %s on mounted fs is not supported.",
val ? "En" : "Dis", fa->kobj_attr.attr.name);
return -EPERM;
}
btrfs_info(fs_info, "%s %s feature flag",
val ? "Setting" : "Clearing", fa->kobj_attr.attr.name);
trans = btrfs_start_transaction(fs_info->fs_root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
spin_lock(&fs_info->super_lock);
features = get_features(fs_info, fa->feature_set);
if (val)
features |= fa->feature_bit;
else
features &= ~fa->feature_bit;
set_features(fs_info, fa->feature_set, features);
spin_unlock(&fs_info->super_lock);
ret = btrfs_commit_transaction(trans, fs_info->fs_root);
if (ret)
return ret;
return count;
}
static umode_t btrfs_feature_visible(struct kobject *kobj,
struct attribute *attr, int unused)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
umode_t mode = attr->mode;
if (fs_info) {
struct btrfs_feature_attr *fa;
u64 features;
fa = attr_to_btrfs_feature_attr(attr);
features = get_features(fs_info, fa->feature_set);
if (can_modify_feature(fa))
mode |= S_IWUSR;
else if (!(features & fa->feature_bit))
mode = 0;
}
return mode;
}
BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF);
BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL);
BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS);
BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO);
BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
BTRFS_FEAT_ATTR_PTR(default_subvol),
BTRFS_FEAT_ATTR_PTR(mixed_groups),
BTRFS_FEAT_ATTR_PTR(compress_lzo),
BTRFS_FEAT_ATTR_PTR(big_metadata),
BTRFS_FEAT_ATTR_PTR(extended_iref),
BTRFS_FEAT_ATTR_PTR(raid56),
BTRFS_FEAT_ATTR_PTR(skinny_metadata),
BTRFS_FEAT_ATTR_PTR(no_holes),
NULL
};
static const struct attribute_group btrfs_feature_attr_group = {
.name = "features",
.is_visible = btrfs_feature_visible,
.attrs = btrfs_supported_feature_attrs,
};
static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
{
u64 val;
if (lock)
spin_lock(lock);
val = *value_ptr;
if (lock)
spin_unlock(lock);
return snprintf(buf, PAGE_SIZE, "%llu\n", val);
}
static ssize_t global_rsv_size_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
}
BTRFS_ATTR(global_rsv_size, global_rsv_size_show);
static ssize_t global_rsv_reserved_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
}
BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show);
#define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
#define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj)
static ssize_t raid_bytes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
BTRFS_RAID_ATTR(total_bytes, raid_bytes_show);
BTRFS_RAID_ATTR(used_bytes, raid_bytes_show);
static ssize_t raid_bytes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
struct btrfs_block_group_cache *block_group;
int index = to_raid_kobj(kobj)->raid_type;
u64 val = 0;
down_read(&sinfo->groups_sem);
list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes))
val += block_group->key.offset;
else
val += btrfs_block_group_used(&block_group->item);
}
up_read(&sinfo->groups_sem);
return snprintf(buf, PAGE_SIZE, "%llu\n", val);
}
static struct attribute *raid_attributes[] = {
BTRFS_RAID_ATTR_PTR(total_bytes),
BTRFS_RAID_ATTR_PTR(used_bytes),
NULL
};
static void release_raid_kobj(struct kobject *kobj)
{
kfree(to_raid_kobj(kobj));
}
struct kobj_type btrfs_raid_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = release_raid_kobj,
.default_attrs = raid_attributes,
};
#define SPACE_INFO_ATTR(field) \
static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \
struct kobj_attribute *a, \
char *buf) \
{ \
struct btrfs_space_info *sinfo = to_space_info(kobj); \
return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \
} \
BTRFS_ATTR(field, btrfs_space_info_show_##field)
static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_space_info *sinfo = to_space_info(kobj);
s64 val = percpu_counter_sum(&sinfo->total_bytes_pinned);
return snprintf(buf, PAGE_SIZE, "%lld\n", val);
}
SPACE_INFO_ATTR(flags);
SPACE_INFO_ATTR(total_bytes);
SPACE_INFO_ATTR(bytes_used);
SPACE_INFO_ATTR(bytes_pinned);
SPACE_INFO_ATTR(bytes_reserved);
SPACE_INFO_ATTR(bytes_may_use);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned);
static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(flags),
BTRFS_ATTR_PTR(total_bytes),
BTRFS_ATTR_PTR(bytes_used),
BTRFS_ATTR_PTR(bytes_pinned),
BTRFS_ATTR_PTR(bytes_reserved),
BTRFS_ATTR_PTR(bytes_may_use),
BTRFS_ATTR_PTR(disk_used),
BTRFS_ATTR_PTR(disk_total),
BTRFS_ATTR_PTR(total_bytes_pinned),
NULL,
};
static void space_info_release(struct kobject *kobj)
{
struct btrfs_space_info *sinfo = to_space_info(kobj);
percpu_counter_destroy(&sinfo->total_bytes_pinned);
kfree(sinfo);
}
struct kobj_type space_info_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = space_info_release,
.default_attrs = space_info_attrs,
};
static const struct attribute *allocation_attrs[] = {
BTRFS_ATTR_PTR(global_rsv_reserved),
BTRFS_ATTR_PTR(global_rsv_size),
NULL,
};
static ssize_t btrfs_label_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
char *label = fs_info->super_copy->label;
return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
}
static ssize_t btrfs_label_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = fs_info->fs_root;
int ret;
size_t p_len;
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
/*
* p_len is the len until the first occurrence of either
* '\n' or '\0'
*/
p_len = strcspn(buf, "\n");
if (p_len >= BTRFS_LABEL_SIZE)
return -EINVAL;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
spin_lock(&root->fs_info->super_lock);
memset(fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
memcpy(fs_info->super_copy->label, buf, p_len);
spin_unlock(&root->fs_info->super_lock);
ret = btrfs_commit_transaction(trans, root);
if (!ret)
return len;
return ret;
}
BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store);
static ssize_t btrfs_nodesize_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
}
BTRFS_ATTR(nodesize, btrfs_nodesize_show);
static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
}
BTRFS_ATTR(sectorsize, btrfs_sectorsize_show);
static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
}
BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
static struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(label),
BTRFS_ATTR_PTR(nodesize),
BTRFS_ATTR_PTR(sectorsize),
BTRFS_ATTR_PTR(clone_alignment),
NULL,
};
static void btrfs_release_super_kobj(struct kobject *kobj)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
complete(&fs_info->kobj_unregister);
}
static struct kobj_type btrfs_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = btrfs_release_super_kobj,
.default_attrs = btrfs_attrs,
};
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
{
if (kobj->ktype != &btrfs_ktype)
return NULL;
return container_of(kobj, struct btrfs_fs_info, super_kobj);
}
#define NUM_FEATURE_BITS 64
static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
static u64 supported_feature_masks[3] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
[FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
};
static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
{
int set;
for (set = 0; set < FEAT_MAX; set++) {
int i;
struct attribute *attrs[2];
struct attribute_group agroup = {
.name = "features",
.attrs = attrs,
};
u64 features = get_features(fs_info, set);
features &= ~supported_feature_masks[set];
if (!features)
continue;
attrs[1] = NULL;
for (i = 0; i < NUM_FEATURE_BITS; i++) {
struct btrfs_feature_attr *fa;
if (!(features & (1ULL << i)))
continue;
fa = &btrfs_feature_attrs[set][i];
attrs[0] = &fa->kobj_attr.attr;
if (add) {
int ret;
ret = sysfs_merge_group(&fs_info->super_kobj,
&agroup);
if (ret)
return ret;
} else
sysfs_unmerge_group(&fs_info->super_kobj,
&agroup);
}
}
return 0;
}
static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
{
kobject_del(&fs_info->super_kobj);
kobject_put(&fs_info->super_kobj);
wait_for_completion(&fs_info->kobj_unregister);
}
void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
{
if (fs_info->space_info_kobj) {
sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
kobject_del(fs_info->space_info_kobj);
kobject_put(fs_info->space_info_kobj);
}
kobject_del(fs_info->device_dir_kobj);
kobject_put(fs_info->device_dir_kobj);
addrm_unknown_feature_attrs(fs_info, false);
sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
__btrfs_sysfs_remove_one(fs_info);
}
const char * const btrfs_feature_set_names[3] = {
[FEAT_COMPAT] = "compat",
[FEAT_COMPAT_RO] = "compat_ro",
[FEAT_INCOMPAT] = "incompat",
};
char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags)
{
size_t bufsize = 4096; /* safe max, 64 names * 64 bytes */
int len = 0;
int i;
char *str;
str = kmalloc(bufsize, GFP_KERNEL);
if (!str)
return str;
for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
const char *name;
if (!(flags & (1ULL << i)))
continue;
name = btrfs_feature_attrs[set][i].kobj_attr.attr.name;
len += snprintf(str + len, bufsize - len, "%s%s",
len ? "," : "", name);
}
return str;
}
static void init_feature_attrs(void)
{
struct btrfs_feature_attr *fa;
int set, i;
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
ARRAY_SIZE(btrfs_feature_attrs));
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
ARRAY_SIZE(btrfs_feature_attrs[0]));
memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
memset(btrfs_unknown_feature_names, 0,
sizeof(btrfs_unknown_feature_names));
for (i = 0; btrfs_supported_feature_attrs[i]; i++) {
struct btrfs_feature_attr *sfa;
struct attribute *a = btrfs_supported_feature_attrs[i];
int bit;
sfa = attr_to_btrfs_feature_attr(a);
bit = ilog2(sfa->feature_bit);
fa = &btrfs_feature_attrs[sfa->feature_set][bit];
fa->kobj_attr.attr.name = sfa->kobj_attr.attr.name;
}
for (set = 0; set < FEAT_MAX; set++) {
for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
char *name = btrfs_unknown_feature_names[set][i];
fa = &btrfs_feature_attrs[set][i];
if (fa->kobj_attr.attr.name)
continue;
snprintf(name, 13, "%s:%u",
btrfs_feature_set_names[set], i);
fa->kobj_attr.attr.name = name;
fa->kobj_attr.attr.mode = S_IRUGO;
fa->feature_set = set;
fa->feature_bit = 1ULL << i;
}
}
}
int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *one_device)
{
struct hd_struct *disk;
struct kobject *disk_kobj;
if (!fs_info->device_dir_kobj)
return -EINVAL;
if (one_device && one_device->bdev) {
disk = one_device->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
sysfs_remove_link(fs_info->device_dir_kobj,
disk_kobj->name);
}
return 0;
}
int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *one_device)
{
int error = 0;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *dev;
if (!fs_info->device_dir_kobj)
fs_info->device_dir_kobj = kobject_create_and_add("devices",
&fs_info->super_kobj);
if (!fs_info->device_dir_kobj)
return -ENOMEM;
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
struct hd_struct *disk;
struct kobject *disk_kobj;
if (!dev->bdev)
continue;
if (one_device && one_device != dev)
continue;
disk = dev->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
error = sysfs_create_link(fs_info->device_dir_kobj,
disk_kobj, disk_kobj->name);
if (error)
break;
}
return error;
}
/* /sys/fs/btrfs/ entry */
static struct kset *btrfs_kset;
/* /sys/kernel/debug/btrfs */
static struct dentry *btrfs_debugfs_root_dentry;
/* Debugging tunables and exported data */
u64 btrfs_debugfs_test;
int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
{
int error;
init_completion(&fs_info->kobj_unregister);
fs_info->super_kobj.kset = btrfs_kset;
error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
"%pU", fs_info->fsid);
if (error)
return error;
error = sysfs_create_group(&fs_info->super_kobj,
&btrfs_feature_attr_group);
if (error) {
__btrfs_sysfs_remove_one(fs_info);
return error;
}
error = addrm_unknown_feature_attrs(fs_info, true);
if (error)
goto failure;
error = btrfs_kobj_add_device(fs_info, NULL);
if (error)
goto failure;
fs_info->space_info_kobj = kobject_create_and_add("allocation",
&fs_info->super_kobj);
if (!fs_info->space_info_kobj) {
error = -ENOMEM;
goto failure;
}
error = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs);
if (error)
goto failure;
return 0;
failure:
btrfs_sysfs_remove_one(fs_info);
return error;
}
static int btrfs_init_debugfs(void)
{
#ifdef CONFIG_DEBUG_FS
btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL);
if (!btrfs_debugfs_root_dentry)
return -ENOMEM;
debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry,
&btrfs_debugfs_test);
#endif
return 0;
}
int btrfs_init_sysfs(void)
{
int ret;
btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
if (!btrfs_kset)
return -ENOMEM;
ret = btrfs_init_debugfs();
if (ret)
return ret;
init_feature_attrs();
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
return ret;
}
void btrfs_exit_sysfs(void)
{
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
kset_unregister(btrfs_kset);
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
}

77
fs/btrfs/sysfs.h Normal file
View file

@ -0,0 +1,77 @@
#ifndef _BTRFS_SYSFS_H_
#define _BTRFS_SYSFS_H_
/*
* Data exported through sysfs
*/
extern u64 btrfs_debugfs_test;
enum btrfs_feature_set {
FEAT_COMPAT,
FEAT_COMPAT_RO,
FEAT_INCOMPAT,
FEAT_MAX
};
#define __INIT_KOBJ_ATTR(_name, _mode, _show, _store) \
{ \
.attr = { .name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
}
#define BTRFS_ATTR_RW(_name, _show, _store) \
static struct kobj_attribute btrfs_attr_##_name = \
__INIT_KOBJ_ATTR(_name, 0644, _show, _store)
#define BTRFS_ATTR(_name, _show) \
static struct kobj_attribute btrfs_attr_##_name = \
__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
#define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr)
#define BTRFS_RAID_ATTR(_name, _show) \
static struct kobj_attribute btrfs_raid_attr_##_name = \
__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
#define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr)
struct btrfs_feature_attr {
struct kobj_attribute kobj_attr;
enum btrfs_feature_set feature_set;
u64 feature_bit;
};
#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit) \
static struct btrfs_feature_attr btrfs_attr_##_name = { \
.kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \
btrfs_feature_attr_show, \
btrfs_feature_attr_store), \
.feature_set = _feature_set, \
.feature_bit = _prefix ##_## _feature_bit, \
}
#define BTRFS_FEAT_ATTR_PTR(_name) (&btrfs_attr_##_name.kobj_attr.attr)
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
/* convert from attribute */
#define to_btrfs_feature_attr(a) \
container_of(a, struct btrfs_feature_attr, kobj_attr)
#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
#define attr_to_btrfs_feature_attr(a) \
to_btrfs_feature_attr(attr_to_btrfs_attr(a))
char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *one_device);
int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *one_device);
#endif /* _BTRFS_SYSFS_H_ */

View file

@ -0,0 +1,171 @@
/*
* Copyright (C) 2013 Fusion IO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/magic.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../volumes.h"
#include "../disk-io.h"
#include "../qgroup.h"
static struct vfsmount *test_mnt = NULL;
static const struct super_operations btrfs_test_super_ops = {
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_test_destroy_inode,
};
static struct dentry *btrfs_test_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops,
NULL, BTRFS_TEST_MAGIC);
}
static struct file_system_type test_type = {
.name = "btrfs_test_fs",
.mount = btrfs_test_mount,
.kill_sb = kill_anon_super,
};
struct inode *btrfs_new_test_inode(void)
{
return new_inode(test_mnt->mnt_sb);
}
int btrfs_init_test_fs(void)
{
int ret;
ret = register_filesystem(&test_type);
if (ret) {
printk(KERN_ERR "btrfs: cannot register test file system\n");
return ret;
}
test_mnt = kern_mount(&test_type);
if (IS_ERR(test_mnt)) {
printk(KERN_ERR "btrfs: cannot mount test file system\n");
unregister_filesystem(&test_type);
return ret;
}
return 0;
}
void btrfs_destroy_test_fs(void)
{
kern_unmount(test_mnt);
unregister_filesystem(&test_type);
}
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
{
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
GFP_NOFS);
if (!fs_info)
return fs_info;
fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
GFP_NOFS);
if (!fs_info->fs_devices) {
kfree(fs_info);
return NULL;
}
fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
GFP_NOFS);
if (!fs_info->super_copy) {
kfree(fs_info->fs_devices);
kfree(fs_info);
return NULL;
}
if (init_srcu_struct(&fs_info->subvol_srcu)) {
kfree(fs_info->fs_devices);
kfree(fs_info->super_copy);
kfree(fs_info);
return NULL;
}
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->qgroup_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
mutex_init(&fs_info->qgroup_rescan_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
fs_info->running_transaction = NULL;
fs_info->qgroup_tree = RB_ROOT;
fs_info->qgroup_ulist = NULL;
atomic64_set(&fs_info->tree_mod_seq, 0);
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
return fs_info;
}
static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
struct radix_tree_iter iter;
void **slot;
spin_lock(&fs_info->buffer_lock);
restart:
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
struct extent_buffer *eb;
eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
if (!eb)
continue;
/* Shouldn't happen but that kind of thinking creates CVE's */
if (radix_tree_exception(eb)) {
if (radix_tree_deref_retry(eb))
goto restart;
continue;
}
spin_unlock(&fs_info->buffer_lock);
free_extent_buffer_stale(eb);
spin_lock(&fs_info->buffer_lock);
}
spin_unlock(&fs_info->buffer_lock);
btrfs_free_qgroup_config(fs_info);
btrfs_free_fs_roots(fs_info);
cleanup_srcu_struct(&fs_info->subvol_srcu);
kfree(fs_info->super_copy);
kfree(fs_info->fs_devices);
kfree(fs_info);
}
void btrfs_free_dummy_root(struct btrfs_root *root)
{
if (!root)
return;
if (root->node)
free_extent_buffer(root->node);
if (root->fs_info)
btrfs_free_dummy_fs_info(root->fs_info);
kfree(root);
}

View file

@ -0,0 +1,68 @@
/*
* Copyright (C) 2013 Fusion IO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_TESTS
#define __BTRFS_TESTS
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
struct btrfs_root;
int btrfs_test_free_space_cache(void);
int btrfs_test_extent_buffer_operations(void);
int btrfs_test_extent_io(void);
int btrfs_test_inodes(void);
int btrfs_test_qgroups(void);
int btrfs_init_test_fs(void);
void btrfs_destroy_test_fs(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
void btrfs_free_dummy_root(struct btrfs_root *root);
#else
static inline int btrfs_test_free_space_cache(void)
{
return 0;
}
static inline int btrfs_test_extent_buffer_operations(void)
{
return 0;
}
static inline int btrfs_init_test_fs(void)
{
return 0;
}
static inline void btrfs_destroy_test_fs(void)
{
}
static inline int btrfs_test_extent_io(void)
{
return 0;
}
static inline int btrfs_test_inodes(void)
{
return 0;
}
static inline int btrfs_test_qgroups(void)
{
return 0;
}
#endif
#endif

View file

@ -0,0 +1,229 @@
/*
* Copyright (C) 2013 Fusion IO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/slab.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../extent_io.h"
#include "../disk-io.h"
static int test_btrfs_split_item(void)
{
struct btrfs_path *path;
struct btrfs_root *root;
struct extent_buffer *eb;
struct btrfs_item *item;
char *value = "mary had a little lamb";
char *split1 = "mary had a little";
char *split2 = " lamb";
char *split3 = "mary";
char *split4 = " had a little";
char buf[32];
struct btrfs_key key;
u32 value_len = strlen(value);
int ret = 0;
test_msg("Running btrfs_split_item tests\n");
root = btrfs_alloc_dummy_root();
if (IS_ERR(root)) {
test_msg("Could not allocate root\n");
return PTR_ERR(root);
}
path = btrfs_alloc_path();
if (!path) {
test_msg("Could not allocate path\n");
kfree(root);
return -ENOMEM;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
if (!eb) {
test_msg("Could not allocate dummy buffer\n");
ret = -ENOMEM;
goto out;
}
path->slots[0] = 0;
key.objectid = 0;
key.type = BTRFS_EXTENT_CSUM_KEY;
key.offset = 0;
setup_items_for_insert(root, path, &key, &value_len, value_len,
value_len + sizeof(struct btrfs_item), 1);
item = btrfs_item_nr(0);
write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
value_len);
key.offset = 3;
/*
* Passing NULL trans here should be safe because we have plenty of
* space in this leaf to split the item without having to split the
* leaf.
*/
ret = btrfs_split_item(NULL, root, path, &key, 17);
if (ret) {
test_msg("Split item failed %d\n", ret);
goto out;
}
/*
* Read the first slot, it should have the original key and contain only
* 'mary had a little'
*/
btrfs_item_key_to_cpu(eb, &key, 0);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 0) {
test_msg("Invalid key at slot 0\n");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(0);
if (btrfs_item_size(eb, item) != strlen(split1)) {
test_msg("Invalid len in the first split\n");
ret = -EINVAL;
goto out;
}
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
strlen(split1));
if (memcmp(buf, split1, strlen(split1))) {
test_msg("Data in the buffer doesn't match what it should "
"in the first split have='%.*s' want '%s'\n",
(int)strlen(split1), buf, split1);
ret = -EINVAL;
goto out;
}
btrfs_item_key_to_cpu(eb, &key, 1);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 3) {
test_msg("Invalid key at slot 1\n");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(1);
if (btrfs_item_size(eb, item) != strlen(split2)) {
test_msg("Invalid len in the second split\n");
ret = -EINVAL;
goto out;
}
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
strlen(split2));
if (memcmp(buf, split2, strlen(split2))) {
test_msg("Data in the buffer doesn't match what it should "
"in the second split\n");
ret = -EINVAL;
goto out;
}
key.offset = 1;
/* Do it again so we test memmoving the other items in the leaf */
ret = btrfs_split_item(NULL, root, path, &key, 4);
if (ret) {
test_msg("Second split item failed %d\n", ret);
goto out;
}
btrfs_item_key_to_cpu(eb, &key, 0);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 0) {
test_msg("Invalid key at slot 0\n");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(0);
if (btrfs_item_size(eb, item) != strlen(split3)) {
test_msg("Invalid len in the first split\n");
ret = -EINVAL;
goto out;
}
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
strlen(split3));
if (memcmp(buf, split3, strlen(split3))) {
test_msg("Data in the buffer doesn't match what it should "
"in the third split");
ret = -EINVAL;
goto out;
}
btrfs_item_key_to_cpu(eb, &key, 1);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 1) {
test_msg("Invalid key at slot 1\n");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(1);
if (btrfs_item_size(eb, item) != strlen(split4)) {
test_msg("Invalid len in the second split\n");
ret = -EINVAL;
goto out;
}
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
strlen(split4));
if (memcmp(buf, split4, strlen(split4))) {
test_msg("Data in the buffer doesn't match what it should "
"in the fourth split\n");
ret = -EINVAL;
goto out;
}
btrfs_item_key_to_cpu(eb, &key, 2);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 3) {
test_msg("Invalid key at slot 2\n");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(2);
if (btrfs_item_size(eb, item) != strlen(split2)) {
test_msg("Invalid len in the second split\n");
ret = -EINVAL;
goto out;
}
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
strlen(split2));
if (memcmp(buf, split2, strlen(split2))) {
test_msg("Data in the buffer doesn't match what it should "
"in the last chunk\n");
ret = -EINVAL;
goto out;
}
out:
btrfs_free_path(path);
kfree(root);
return ret;
}
int btrfs_test_extent_buffer_operations(void)
{
test_msg("Running extent buffer operation tests");
return test_btrfs_split_item();
}

View file

@ -0,0 +1,276 @@
/*
* Copyright (C) 2013 Fusion IO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/pagemap.h>
#include <linux/sched.h>
#include "btrfs-tests.h"
#include "../extent_io.h"
#define PROCESS_UNLOCK (1 << 0)
#define PROCESS_RELEASE (1 << 1)
#define PROCESS_TEST_LOCKED (1 << 2)
static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
unsigned long flags)
{
int ret;
struct page *pages[16];
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
int count = 0;
int loops = 0;
while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
min_t(unsigned long, nr_pages,
ARRAY_SIZE(pages)), pages);
for (i = 0; i < ret; i++) {
if (flags & PROCESS_TEST_LOCKED &&
!PageLocked(pages[i]))
count++;
if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
unlock_page(pages[i]);
page_cache_release(pages[i]);
if (flags & PROCESS_RELEASE)
page_cache_release(pages[i]);
}
nr_pages -= ret;
index += ret;
cond_resched();
loops++;
if (loops > 100000) {
printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
break;
}
}
return count;
}
static int test_find_delalloc(void)
{
struct inode *inode;
struct extent_io_tree tmp;
struct page *page;
struct page *locked_page = NULL;
unsigned long index = 0;
u64 total_dirty = 256 * 1024 * 1024;
u64 max_bytes = 128 * 1024 * 1024;
u64 start, end, test_start;
u64 found;
int ret = -EINVAL;
inode = btrfs_new_test_inode();
if (!inode) {
test_msg("Failed to allocate test inode\n");
return -ENOMEM;
}
extent_io_tree_init(&tmp, &inode->i_data);
/*
* First go through and create and mark all of our pages dirty, we pin
* everything to make sure our pages don't get evicted and screw up our
* test.
*/
for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
test_msg("Failed to allocate test page\n");
ret = -ENOMEM;
goto out;
}
SetPageDirty(page);
if (index) {
unlock_page(page);
} else {
page_cache_get(page);
locked_page = page;
}
}
/* Test this scenario
* |--- delalloc ---|
* |--- search ---|
*/
set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
test_msg("Should have found at least one delalloc\n");
goto out_bits;
}
if (start != 0 || end != 4095) {
test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
start, end);
goto out_bits;
}
unlock_extent(&tmp, start, end);
unlock_page(locked_page);
page_cache_release(locked_page);
/*
* Test this scenario
*
* |--- delalloc ---|
* |--- search ---|
*/
test_start = 64 * 1024 * 1024;
locked_page = find_lock_page(inode->i_mapping,
test_start >> PAGE_CACHE_SHIFT);
if (!locked_page) {
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
test_msg("Couldn't find delalloc in our range\n");
goto out_bits;
}
if (start != test_start || end != max_bytes - 1) {
test_msg("Expected start %Lu end %Lu, got start %Lu, end "
"%Lu\n", test_start, max_bytes - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end,
PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
test_msg("There were unlocked pages in the range\n");
goto out_bits;
}
unlock_extent(&tmp, start, end);
/* locked_page was unlocked above */
page_cache_release(locked_page);
/*
* Test this scenario
* |--- delalloc ---|
* |--- search ---|
*/
test_start = max_bytes + 4096;
locked_page = find_lock_page(inode->i_mapping, test_start >>
PAGE_CACHE_SHIFT);
if (!locked_page) {
test_msg("Could'nt find the locked page\n");
goto out_bits;
}
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (found) {
test_msg("Found range when we shouldn't have\n");
goto out_bits;
}
if (end != (u64)-1) {
test_msg("Did not return the proper end offset\n");
goto out_bits;
}
/*
* Test this scenario
* [------- delalloc -------|
* [max_bytes]|-- search--|
*
* We are re-using our test_start from above since it works out well.
*/
set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
test_msg("Didn't find our range\n");
goto out_bits;
}
if (start != test_start || end != total_dirty - 1) {
test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
test_start, total_dirty - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end,
PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
test_msg("Pages in range were not all locked\n");
goto out_bits;
}
unlock_extent(&tmp, start, end);
/*
* Now to test where we run into a page that is no longer dirty in the
* range we want to find.
*/
page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024))
>> PAGE_CACHE_SHIFT);
if (!page) {
test_msg("Couldn't find our page\n");
goto out_bits;
}
ClearPageDirty(page);
page_cache_release(page);
/* We unlocked it in the previous test */
lock_page(locked_page);
start = test_start;
end = 0;
/*
* Currently if we fail to find dirty pages in the delalloc range we
* will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If
* this changes at any point in the future we will need to fix this
* tests expected behavior.
*/
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
test_msg("Didn't find our range\n");
goto out_bits;
}
if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
test_start, test_start + PAGE_CACHE_SIZE - 1, start,
end);
goto out_bits;
}
if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
PROCESS_UNLOCK)) {
test_msg("Pages in range were not all locked\n");
goto out_bits;
}
ret = 0;
out_bits:
clear_extent_bits(&tmp, 0, total_dirty - 1,
(unsigned long)-1, GFP_NOFS);
out:
if (locked_page)
page_cache_release(locked_page);
process_page_range(inode, 0, total_dirty - 1,
PROCESS_UNLOCK | PROCESS_RELEASE);
iput(inode);
return ret;
}
int btrfs_test_extent_io(void)
{
test_msg("Running find delalloc tests\n");
return test_find_delalloc();
}

View file

@ -0,0 +1,909 @@
/*
* Copyright (C) 2013 Fusion IO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/slab.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../free-space-cache.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
static struct btrfs_block_group_cache *init_test_block_group(void)
{
struct btrfs_block_group_cache *cache;
cache = kzalloc(sizeof(*cache), GFP_NOFS);
if (!cache)
return NULL;
cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
GFP_NOFS);
if (!cache->free_space_ctl) {
kfree(cache);
return NULL;
}
cache->key.objectid = 0;
cache->key.offset = 1024 * 1024 * 1024;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
cache->sectorsize = 4096;
cache->full_stripe_len = 4096;
spin_lock_init(&cache->lock);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->bg_list);
btrfs_init_free_space_ctl(cache);
return cache;
}
/*
* This test just does basic sanity checking, making sure we can add an exten
* entry and remove space from either end and the middle, and make sure we can
* remove space that covers adjacent extent entries.
*/
static int test_extents(struct btrfs_block_group_cache *cache)
{
int ret = 0;
test_msg("Running extent only tests\n");
/* First just make sure we can remove an entire entry */
ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
if (ret) {
test_msg("Error adding initial extents %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
if (ret) {
test_msg("Error removing extent %d\n", ret);
return ret;
}
if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
test_msg("Full remove left some lingering space\n");
return -1;
}
/* Ok edge and middle cases now */
ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
if (ret) {
test_msg("Error adding half extent %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
if (ret) {
test_msg("Error removing tail end %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
if (ret) {
test_msg("Error removing front end %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
if (ret) {
test_msg("Error removing middle piece %d\n", ret);
return ret;
}
if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
test_msg("Still have space at the front\n");
return -1;
}
if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) {
test_msg("Still have space in the middle\n");
return -1;
}
if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
test_msg("Still have space at the end\n");
return -1;
}
/* Cleanup */
__btrfs_remove_free_space_cache(cache->free_space_ctl);
return 0;
}
static int test_bitmaps(struct btrfs_block_group_cache *cache)
{
u64 next_bitmap_offset;
int ret;
test_msg("Running bitmap only tests\n");
ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't create a bitmap entry %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
if (ret) {
test_msg("Error removing bitmap full range %d\n", ret);
return ret;
}
if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
test_msg("Left some space in bitmap\n");
return -1;
}
ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't add to our bitmap entry %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
if (ret) {
test_msg("Couldn't remove middle chunk %d\n", ret);
return ret;
}
/*
* The first bitmap we have starts at offset 0 so the next one is just
* at the end of the first bitmap.
*/
next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
/* Test a bit straddling two bitmaps */
ret = test_add_free_space_entry(cache, next_bitmap_offset -
(2 * 1024 * 1024), 4 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't add space that straddles two bitmaps %d\n",
ret);
return ret;
}
ret = btrfs_remove_free_space(cache, next_bitmap_offset -
(1 * 1024 * 1024), 2 * 1024 * 1024);
if (ret) {
test_msg("Couldn't remove overlapping space %d\n", ret);
return ret;
}
if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
2 * 1024 * 1024)) {
test_msg("Left some space when removing overlapping\n");
return -1;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
return 0;
}
/* This is the high grade jackassery */
static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
{
u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
int ret;
test_msg("Running bitmap and extent tests\n");
/*
* First let's do something simple, an extent at the same offset as the
* bitmap, but the free space completely in the extent and then
* completely in the bitmap.
*/
ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't create bitmap entry %d\n", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
if (ret) {
test_msg("Couldn't remove extent entry %d\n", ret);
return ret;
}
if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
test_msg("Left remnants after our remove\n");
return -1;
}
/* Now to add back the extent entry and remove from the bitmap */
ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
if (ret) {
test_msg("Couldn't re-add extent entry %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
if (ret) {
test_msg("Couldn't remove from bitmap %d\n", ret);
return ret;
}
if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
test_msg("Left remnants in the bitmap\n");
return -1;
}
/*
* Ok so a little more evil, extent entry and bitmap at the same offset,
* removing an overlapping chunk.
*/
ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't add to a bitmap %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
if (ret) {
test_msg("Couldn't remove overlapping space %d\n", ret);
return ret;
}
if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
test_msg("Left over pieces after removing overlapping\n");
return -1;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
/* Now with the extent entry offset into the bitmap */
ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't add space to the bitmap %d\n", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
if (ret) {
test_msg("Couldn't add extent to the cache %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
if (ret) {
test_msg("Problem removing overlapping space %d\n", ret);
return ret;
}
if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
test_msg("Left something behind when removing space");
return -1;
}
/*
* This has blown up in the past, the extent entry starts before the
* bitmap entry, but we're trying to remove an offset that falls
* completely within the bitmap range and is in both the extent entry
* and the bitmap entry, looks like this
*
* [ extent ]
* [ bitmap ]
* [ del ]
*/
__btrfs_remove_free_space_cache(cache->free_space_ctl);
ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
4 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't add bitmap %d\n", ret);
return ret;
}
ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
5 * 1024 * 1024, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
5 * 1024 * 1024);
if (ret) {
test_msg("Failed to free our space %d\n", ret);
return ret;
}
if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
5 * 1024 * 1024)) {
test_msg("Left stuff over\n");
return -1;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
/*
* This blew up before, we have part of the free space in a bitmap and
* then the entirety of the rest of the space in an extent. This used
* to return -EAGAIN back from btrfs_remove_extent, make sure this
* doesn't happen.
*/
ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
if (ret) {
test_msg("Couldn't add bitmap entry %d\n", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
if (ret) {
test_msg("Error removing bitmap and extent overlapping %d\n", ret);
return ret;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
return 0;
}
/* Used by test_steal_space_from_bitmap_to_extent(). */
static bool test_use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
return ctl->free_extents > 0;
}
/* Used by test_steal_space_from_bitmap_to_extent(). */
static int
check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache,
const int num_extents,
const int num_bitmaps)
{
if (cache->free_space_ctl->free_extents != num_extents) {
test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
cache->free_space_ctl->free_extents, num_extents);
return -EINVAL;
}
if (cache->free_space_ctl->total_bitmaps != num_bitmaps) {
test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
cache->free_space_ctl->total_bitmaps, num_bitmaps);
return -EINVAL;
}
return 0;
}
/* Used by test_steal_space_from_bitmap_to_extent(). */
static int check_cache_empty(struct btrfs_block_group_cache *cache)
{
u64 offset;
u64 max_extent_size;
/*
* Now lets confirm that there's absolutely no free space left to
* allocate.
*/
if (cache->free_space_ctl->free_space != 0) {
test_msg("Cache free space is not 0\n");
return -EINVAL;
}
/* And any allocation request, no matter how small, should fail now. */
offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0,
&max_extent_size);
if (offset != 0) {
test_msg("Space allocation did not fail, returned offset: %llu",
offset);
return -EINVAL;
}
/* And no extent nor bitmap entries in the cache anymore. */
return check_num_extents_and_bitmaps(cache, 0, 0);
}
/*
* Before we were able to steal free space from a bitmap entry to an extent
* entry, we could end up with 2 entries representing a contiguous free space.
* One would be an extent entry and the other a bitmap entry. Since in order
* to allocate space to a caller we use only 1 entry, we couldn't return that
* whole range to the caller if it was requested. This forced the caller to
* either assume ENOSPC or perform several smaller space allocations, which
* wasn't optimal as they could be spread all over the block group while under
* concurrency (extra overhead and fragmentation).
*
* This stealing approach is benefical, since we always prefer to allocate from
* extent entries, both for clustered and non-clustered allocation requests.
*/
static int
test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
{
int ret;
u64 offset;
u64 max_extent_size;
bool (*use_bitmap_op)(struct btrfs_free_space_ctl *,
struct btrfs_free_space *);
test_msg("Running space stealing from bitmap to extent\n");
/*
* For this test, we want to ensure we end up with an extent entry
* immediately adjacent to a bitmap entry, where the bitmap starts
* at an offset where the extent entry ends. We keep adding and
* removing free space to reach into this state, but to get there
* we need to reach a point where marking new free space doesn't
* result in adding new extent entries or merging the new space
* with existing extent entries - the space ends up being marked
* in an existing bitmap that covers the new free space range.
*
* To get there, we need to reach the threshold defined set at
* cache->free_space_ctl->extents_thresh, which currently is
* 256 extents on a x86_64 system at least, and a few other
* conditions (check free_space_cache.c). Instead of making the
* test much longer and complicated, use a "use_bitmap" operation
* that forces use of bitmaps as soon as we have at least 1
* extent entry.
*/
use_bitmap_op = cache->free_space_ctl->op->use_bitmap;
cache->free_space_ctl->op->use_bitmap = test_use_bitmap;
/*
* Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[
*/
ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 - 256 * 1024,
128 * 1024, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
/* Bitmap entry covering free space range [128Mb + 512Kb, 256Mb[ */
ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 512 * 1024,
128 * 1024 * 1024 - 512 * 1024, 1);
if (ret) {
test_msg("Couldn't add bitmap entry %d\n", ret);
return ret;
}
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* Now make only the first 256Kb of the bitmap marked as free, so that
* we end up with only the following ranges marked as free space:
*
* [128Mb - 256Kb, 128Mb - 128Kb[
* [128Mb + 512Kb, 128Mb + 768Kb[
*/
ret = btrfs_remove_free_space(cache,
128 * 1024 * 1024 + 768 * 1024,
128 * 1024 * 1024 - 768 * 1024);
if (ret) {
test_msg("Failed to free part of bitmap space %d\n", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024,
128 * 1024)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
if (!test_check_exists(cache, 128 * 1024 * 1024 + 512 * 1024,
256 * 1024)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
/*
* Confirm that the bitmap range [128Mb + 768Kb, 256Mb[ isn't marked
* as free anymore.
*/
if (test_check_exists(cache, 128 * 1024 * 1024 + 768 * 1024,
128 * 1024 * 1024 - 768 * 1024)) {
test_msg("Bitmap region not removed from space cache\n");
return -EINVAL;
}
/*
* Confirm that the region [128Mb + 256Kb, 128Mb + 512Kb[, which is
* covered by the bitmap, isn't marked as free.
*/
if (test_check_exists(cache, 128 * 1024 * 1024 + 256 * 1024,
256 * 1024)) {
test_msg("Invalid bitmap region marked as free\n");
return -EINVAL;
}
/*
* Confirm that the region [128Mb, 128Mb + 256Kb[, which is covered
* by the bitmap too, isn't marked as free either.
*/
if (test_check_exists(cache, 128 * 1024 * 1024,
256 * 1024)) {
test_msg("Invalid bitmap region marked as free\n");
return -EINVAL;
}
/*
* Now lets mark the region [128Mb, 128Mb + 512Kb[ as free too. But,
* lets make sure the free space cache marks it as free in the bitmap,
* and doesn't insert a new extent entry to represent this region.
*/
ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 512 * 1024);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, 128 * 1024 * 1024, 512 * 1024)) {
test_msg("Bitmap region not marked as free\n");
return -ENOENT;
}
/*
* Confirm that no new extent entries or bitmap entries were added to
* the cache after adding that free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* Now lets add a small free space region to the right of the previous
* one, which is not contiguous with it and is part of the bitmap too.
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 + 16 * 1024 * 1024,
4096);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/*
* Confirm that no new extent entries or bitmap entries were added to
* the cache after adding that free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* Now mark the region [128Mb - 128Kb, 128Mb[ as free too. This will
* expand the range covered by the existing extent entry that represents
* the free space [128Mb - 256Kb, 128Mb - 128Kb[.
*/
ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 128 * 1024,
128 * 1024);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, 128 * 1024 * 1024 - 128 * 1024,
128 * 1024)) {
test_msg("Extent region not marked as free\n");
return -ENOENT;
}
/*
* Confirm that our extent entry didn't stole all free space from the
* bitmap, because of the small 4Kb free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* So now we have the range [128Mb - 256Kb, 128Mb + 768Kb[ as free
* space. Without stealing bitmap free space into extent entry space,
* we would have all this free space represented by 2 entries in the
* cache:
*
* extent entry covering range: [128Mb - 256Kb, 128Mb[
* bitmap entry covering range: [128Mb, 128Mb + 768Kb[
*
* Attempting to allocate the whole free space (1Mb) would fail, because
* we can't allocate from multiple entries.
* With the bitmap free space stealing, we get a single extent entry
* that represents the 1Mb free space, and therefore we're able to
* allocate the whole free space at once.
*/
if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024,
1 * 1024 * 1024)) {
test_msg("Expected region not marked as free\n");
return -ENOENT;
}
if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 4096)) {
test_msg("Cache free space is not 1Mb + 4Kb\n");
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
0, 1 * 1024 * 1024, 0,
&max_extent_size);
if (offset != (128 * 1024 * 1024 - 256 * 1024)) {
test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
}
/* All that remains is a 4Kb free space region in a bitmap. Confirm. */
ret = check_num_extents_and_bitmaps(cache, 1, 1);
if (ret)
return ret;
if (cache->free_space_ctl->free_space != 4096) {
test_msg("Cache free space is not 4Kb\n");
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
0, 4096, 0,
&max_extent_size);
if (offset != (128 * 1024 * 1024 + 16 * 1024 * 1024)) {
test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
}
ret = check_cache_empty(cache);
if (ret)
return ret;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
/*
* Now test a similar scenario, but where our extent entry is located
* to the right of the bitmap entry, so that we can check that stealing
* space from a bitmap to the front of an extent entry works.
*/
/*
* Extent entry covering free space range [128Mb + 128Kb, 128Mb + 256Kb[
*/
ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 128 * 1024,
128 * 1024, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
/* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */
ret = test_add_free_space_entry(cache, 0,
128 * 1024 * 1024 - 512 * 1024, 1);
if (ret) {
test_msg("Couldn't add bitmap entry %d\n", ret);
return ret;
}
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* Now make only the last 256Kb of the bitmap marked as free, so that
* we end up with only the following ranges marked as free space:
*
* [128Mb + 128b, 128Mb + 256Kb[
* [128Mb - 768Kb, 128Mb - 512Kb[
*/
ret = btrfs_remove_free_space(cache,
0,
128 * 1024 * 1024 - 768 * 1024);
if (ret) {
test_msg("Failed to free part of bitmap space %d\n", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
if (!test_check_exists(cache, 128 * 1024 * 1024 + 128 * 1024,
128 * 1024)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024,
256 * 1024)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
/*
* Confirm that the bitmap range [0, 128Mb - 768Kb[ isn't marked
* as free anymore.
*/
if (test_check_exists(cache, 0,
128 * 1024 * 1024 - 768 * 1024)) {
test_msg("Bitmap region not removed from space cache\n");
return -EINVAL;
}
/*
* Confirm that the region [128Mb - 512Kb, 128Mb[, which is
* covered by the bitmap, isn't marked as free.
*/
if (test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024,
512 * 1024)) {
test_msg("Invalid bitmap region marked as free\n");
return -EINVAL;
}
/*
* Now lets mark the region [128Mb - 512Kb, 128Mb[ as free too. But,
* lets make sure the free space cache marks it as free in the bitmap,
* and doesn't insert a new extent entry to represent this region.
*/
ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 512 * 1024,
512 * 1024);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024,
512 * 1024)) {
test_msg("Bitmap region not marked as free\n");
return -ENOENT;
}
/*
* Confirm that no new extent entries or bitmap entries were added to
* the cache after adding that free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* Now lets add a small free space region to the left of the previous
* one, which is not contiguous with it and is part of the bitmap too.
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
ret = btrfs_add_free_space(cache, 32 * 1024 * 1024, 8192);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/*
* Now mark the region [128Mb, 128Mb + 128Kb[ as free too. This will
* expand the range covered by the existing extent entry that represents
* the free space [128Mb + 128Kb, 128Mb + 256Kb[.
*/
ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 128 * 1024);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, 128 * 1024 * 1024, 128 * 1024)) {
test_msg("Extent region not marked as free\n");
return -ENOENT;
}
/*
* Confirm that our extent entry didn't stole all free space from the
* bitmap, because of the small 8Kb free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
return ret;
/*
* So now we have the range [128Mb - 768Kb, 128Mb + 256Kb[ as free
* space. Without stealing bitmap free space into extent entry space,
* we would have all this free space represented by 2 entries in the
* cache:
*
* extent entry covering range: [128Mb, 128Mb + 256Kb[
* bitmap entry covering range: [128Mb - 768Kb, 128Mb[
*
* Attempting to allocate the whole free space (1Mb) would fail, because
* we can't allocate from multiple entries.
* With the bitmap free space stealing, we get a single extent entry
* that represents the 1Mb free space, and therefore we're able to
* allocate the whole free space at once.
*/
if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024,
1 * 1024 * 1024)) {
test_msg("Expected region not marked as free\n");
return -ENOENT;
}
if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 8192)) {
test_msg("Cache free space is not 1Mb + 8Kb\n");
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
0, 1 * 1024 * 1024, 0,
&max_extent_size);
if (offset != (128 * 1024 * 1024 - 768 * 1024)) {
test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
}
/* All that remains is a 8Kb free space region in a bitmap. Confirm. */
ret = check_num_extents_and_bitmaps(cache, 1, 1);
if (ret)
return ret;
if (cache->free_space_ctl->free_space != 8192) {
test_msg("Cache free space is not 8Kb\n");
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
0, 8192, 0,
&max_extent_size);
if (offset != (32 * 1024 * 1024)) {
test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
}
ret = check_cache_empty(cache);
if (ret)
return ret;
cache->free_space_ctl->op->use_bitmap = use_bitmap_op;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
return 0;
}
int btrfs_test_free_space_cache(void)
{
struct btrfs_block_group_cache *cache;
int ret;
test_msg("Running btrfs free space cache tests\n");
cache = init_test_block_group();
if (!cache) {
test_msg("Couldn't run the tests\n");
return 0;
}
ret = test_extents(cache);
if (ret)
goto out;
ret = test_bitmaps(cache);
if (ret)
goto out;
ret = test_bitmaps_and_extents(cache);
if (ret)
goto out;
ret = test_steal_space_from_bitmap_to_extent(cache);
out:
__btrfs_remove_free_space_cache(cache->free_space_ctl);
kfree(cache->free_space_ctl);
kfree(cache);
test_msg("Free space cache tests finished\n");
return ret;
}

View file

@ -0,0 +1,928 @@
/*
* Copyright (C) 2013 Fusion IO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../btrfs_inode.h"
#include "../disk-io.h"
#include "../extent_io.h"
#include "../volumes.h"
static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
u64 ram_bytes, u64 offset, u64 disk_bytenr,
u64 disk_len, u32 type, u8 compression, int slot)
{
struct btrfs_path path;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf = root->node;
struct btrfs_key key;
u32 value_len = sizeof(struct btrfs_file_extent_item);
if (type == BTRFS_FILE_EXTENT_INLINE)
value_len += len;
memset(&path, 0, sizeof(path));
path.nodes[0] = leaf;
path.slots[0] = slot;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = start;
setup_items_for_insert(root, &path, &key, &value_len, value_len,
value_len + sizeof(struct btrfs_item), 1);
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, fi, 1);
btrfs_set_file_extent_type(leaf, fi, type);
btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len);
btrfs_set_file_extent_offset(leaf, fi, offset);
btrfs_set_file_extent_num_bytes(leaf, fi, len);
btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
btrfs_set_file_extent_compression(leaf, fi, compression);
btrfs_set_file_extent_encryption(leaf, fi, 0);
btrfs_set_file_extent_other_encoding(leaf, fi, 0);
}
static void insert_inode_item_key(struct btrfs_root *root)
{
struct btrfs_path path;
struct extent_buffer *leaf = root->node;
struct btrfs_key key;
u32 value_len = 0;
memset(&path, 0, sizeof(path));
path.nodes[0] = leaf;
path.slots[0] = 0;
key.objectid = BTRFS_INODE_ITEM_KEY;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
setup_items_for_insert(root, &path, &key, &value_len, value_len,
value_len + sizeof(struct btrfs_item), 1);
}
/*
* Build the most complicated map of extents the earth has ever seen. We want
* this so we can test all of the corner cases of btrfs_get_extent. Here is a
* diagram of how the extents will look though this may not be possible we still
* want to make sure everything acts normally (the last number is not inclusive)
*
* [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
* [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
*
* [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
* [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
*
* [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
* [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
*
* [81920-86016]
* [ regular ]
*/
static void setup_file_extents(struct btrfs_root *root)
{
int slot = 0;
u64 disk_bytenr = 1 * 1024 * 1024;
u64 offset = 0;
/* First we want a hole */
insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
slot);
slot++;
offset += 5;
/*
* Now we want an inline extent, I don't think this is possible but hey
* why not? Also keep in mind if we have an inline extent it counts as
* the whole first page. If we were to expand it we would have to cow
* and we wouldn't have an inline extent anymore.
*/
insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
slot);
slot++;
offset = 4096;
/* Now another hole */
insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
slot);
slot++;
offset += 4;
/* Now for a regular extent */
insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
disk_bytenr += 4096;
offset += 4095;
/*
* Now for 3 extents that were split from a hole punch so we test
* offsets properly.
*/
insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
offset += 4096;
insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
0, slot);
slot++;
offset += 4096;
insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
offset += 8192;
disk_bytenr += 16384;
/* Now for a unwritten prealloc extent */
insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
offset += 4096;
/*
* We want to jack up disk_bytenr a little more so the em stuff doesn't
* merge our records.
*/
disk_bytenr += 8192;
/*
* Now for a partially written prealloc extent, basically the same as
* the hole punch example above. Ram_bytes never changes when you mark
* extents written btw.
*/
insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
offset += 4096;
insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
offset += 4096;
insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
offset += 8192;
disk_bytenr += 16384;
/* Now a normal compressed extent */
insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
slot++;
offset += 8192;
/* No merges */
disk_bytenr += 8192;
/* Now a split compressed extent */
insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
slot++;
offset += 4096;
insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
offset += 4096;
insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
slot++;
offset += 8192;
disk_bytenr += 8192;
/* Now extents that have a hole but no hole extent */
insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
offset += 16384;
disk_bytenr += 4096;
insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
BTRFS_FILE_EXTENT_REG, 0, slot);
}
static unsigned long prealloc_only = 0;
static unsigned long compressed_only = 0;
static unsigned long vacancy_only = 0;
static noinline int test_btrfs_get_extent(void)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
struct extent_map *em = NULL;
u64 orig_start;
u64 disk_bytenr;
u64 offset;
int ret = -ENOMEM;
inode = btrfs_new_test_inode();
if (!inode) {
test_msg("Couldn't allocate inode\n");
return ret;
}
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
root = btrfs_alloc_dummy_root();
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
}
/*
* We do this since btrfs_get_extent wants to assign em->bdev to
* root->fs_info->fs_devices->latest_bdev.
*/
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info) {
test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
root->node = alloc_dummy_extent_buffer(0, 4096);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
}
/*
* We will just free a dummy node if it's ref count is 2 so we need an
* extra ref so our searches don't accidently release our page.
*/
extent_buffer_get(root->node);
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
ret = -EINVAL;
/* First with no extents */
BTRFS_I(inode)->root = root;
em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
if (IS_ERR(em)) {
em = NULL;
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
test_msg("Vacancy flag wasn't set properly\n");
goto out;
}
free_extent_map(em);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
/*
* All of the magic numbers are based on the mapping setup in
* setup_file_extents, so if you change anything there you need to
* update the comment and update the expected values below.
*/
setup_file_extents(root);
em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
if (em->start != 0 || em->len != 5) {
test_msg("Unexpected extent wanted start 0 len 5, got start "
"%llu len %llu\n", em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_INLINE) {
test_msg("Expected an inline, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4091) {
test_msg("Unexpected extent wanted start %llu len 1, got start "
"%llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
/*
* We don't test anything else for inline since it doesn't get set
* unless we have a page for it to write into. Maybe we should change
* this?
*/
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4) {
test_msg("Unexpected extent wanted start %llu len 4, got start "
"%llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* Regular extent */
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4095) {
test_msg("Unexpected extent wanted start %llu len 4095, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* The next 3 are split extents */
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
disk_bytenr = em->block_start;
orig_start = em->start;
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 8192) {
test_msg("Unexpected extent wanted start %llu len 8192, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != orig_start) {
test_msg("Wrong orig offset, want %llu, have %llu\n",
orig_start, em->orig_start);
goto out;
}
disk_bytenr += (em->start - orig_start);
if (em->block_start != disk_bytenr) {
test_msg("Wrong block start, want %llu, have %llu\n",
disk_bytenr, em->block_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* Prealloc extent */
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* The next 3 are a half written prealloc extent */
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
disk_bytenr = em->block_start;
orig_start = em->start;
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_HOLE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != orig_start) {
test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
orig_start, em->orig_start);
goto out;
}
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
test_msg("Unexpected block start, wanted %llu, have %llu\n",
disk_bytenr + (em->start - em->orig_start),
em->block_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 8192) {
test_msg("Unexpected extent wanted start %llu len 8192, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != orig_start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
em->orig_start);
goto out;
}
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
test_msg("Unexpected block start, wanted %llu, have %llu\n",
disk_bytenr + (em->start - em->orig_start),
em->block_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* Now for the compressed extent */
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 8192) {
test_msg("Unexpected extent wanted start %llu len 8192, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n",
em->start, em->orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
test_msg("Unexpected compress type, wanted %d, got %d\n",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* Split compressed extent */
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n",
em->start, em->orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
test_msg("Unexpected compress type, wanted %d, got %d\n",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
disk_bytenr = em->block_start;
orig_start = em->start;
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != disk_bytenr) {
test_msg("Block start does not match, want %llu got %llu\n",
disk_bytenr, em->block_start);
goto out;
}
if (em->start != offset || em->len != 8192) {
test_msg("Unexpected extent wanted start %llu len 8192, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != orig_start) {
test_msg("Wrong orig offset, want %llu, have %llu\n",
em->start, orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
test_msg("Unexpected compress type, wanted %d, got %d\n",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
/* A hole between regular extents but no hole extent */
em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
test_msg("Expected a hole extent, got %llu\n", em->block_start);
goto out;
}
/*
* Currently we just return a length that we requested rather than the
* length of the actual hole, if this changes we'll have to change this
* test.
*/
if (em->start != offset || em->len != 12288) {
test_msg("Unexpected extent wanted start %llu len 12288, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
test_msg("Unexpected flags set, want %lu have %lu\n",
vacancy_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
offset = em->start + em->len;
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4096) {
test_msg("Unexpected extent wanted start %llu len 4096, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
goto out;
}
if (em->orig_start != em->start) {
test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
em->orig_start);
goto out;
}
ret = 0;
out:
if (!IS_ERR(em))
free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
return ret;
}
static int test_hole_first(void)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
struct extent_map *em = NULL;
int ret = -ENOMEM;
inode = btrfs_new_test_inode();
if (!inode) {
test_msg("Couldn't allocate inode\n");
return ret;
}
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
root = btrfs_alloc_dummy_root();
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
}
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info) {
test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
root->node = alloc_dummy_extent_buffer(0, 4096);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
}
extent_buffer_get(root->node);
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
BTRFS_I(inode)->root = root;
ret = -EINVAL;
/*
* Need a blank inode item here just so we don't confuse
* btrfs_get_extent.
*/
insert_inode_item_key(root);
insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
BTRFS_FILE_EXTENT_REG, 0, 1);
em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
if (em->start != 0 || em->len != 4096) {
test_msg("Unexpected extent wanted start 0 len 4096, got start "
"%llu len %llu\n", em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
em->flags);
goto out;
}
free_extent_map(em);
em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
if (em->block_start != 4096) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
if (em->start != 4096 || em->len != 4096) {
test_msg("Unexpected extent wanted start 4096 len 4096, got "
"start %llu len %llu\n", em->start, em->len);
goto out;
}
if (em->flags != 0) {
test_msg("Unexpected flags set, wanted 0 got %lu\n",
em->flags);
goto out;
}
ret = 0;
out:
if (!IS_ERR(em))
free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
return ret;
}
int btrfs_test_inodes(void)
{
int ret;
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
test_msg("Running btrfs_get_extent tests\n");
ret = test_btrfs_get_extent();
if (ret)
return ret;
test_msg("Running hole first btrfs_get_extent test\n");
return test_hole_first();
}

View file

@ -0,0 +1,470 @@
/*
* Copyright (C) 2013 Facebook. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../transaction.h"
#include "../disk-io.h"
#include "../qgroup.h"
static void init_dummy_trans(struct btrfs_trans_handle *trans)
{
memset(trans, 0, sizeof(*trans));
trans->transid = 1;
INIT_LIST_HEAD(&trans->qgroup_ref_list);
trans->type = __TRANS_DUMMY;
}
static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 parent, u64 root_objectid)
{
struct btrfs_trans_handle trans;
struct btrfs_extent_item *item;
struct btrfs_extent_inline_ref *iref;
struct btrfs_tree_block_info *block_info;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
int ret;
init_dummy_trans(&trans);
ins.objectid = bytenr;
ins.type = BTRFS_EXTENT_ITEM_KEY;
ins.offset = num_bytes;
path = btrfs_alloc_path();
if (!path) {
test_msg("Couldn't allocate path\n");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
if (ret) {
test_msg("Couldn't insert ref %d\n", ret);
btrfs_free_path(path);
return ret;
}
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
btrfs_set_extent_refs(leaf, item, 1);
btrfs_set_extent_generation(leaf, item, 1);
btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK);
block_info = (struct btrfs_tree_block_info *)(item + 1);
btrfs_set_tree_block_level(leaf, block_info, 1);
iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
if (parent > 0) {
btrfs_set_extent_inline_ref_type(leaf, iref,
BTRFS_SHARED_BLOCK_REF_KEY);
btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
} else {
btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
}
btrfs_free_path(path);
return 0;
}
static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid)
{
struct btrfs_trans_handle trans;
struct btrfs_extent_item *item;
struct btrfs_path *path;
struct btrfs_key key;
u64 refs;
int ret;
init_dummy_trans(&trans);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
path = btrfs_alloc_path();
if (!path) {
test_msg("Couldn't allocate path\n");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
test_msg("Couldn't find extent ref\n");
btrfs_free_path(path);
return ret;
}
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_extent_item);
refs = btrfs_extent_refs(path->nodes[0], item);
btrfs_set_extent_refs(path->nodes[0], item, refs + 1);
btrfs_release_path(path);
key.objectid = bytenr;
if (parent) {
key.type = BTRFS_SHARED_BLOCK_REF_KEY;
key.offset = parent;
} else {
key.type = BTRFS_TREE_BLOCK_REF_KEY;
key.offset = root_objectid;
}
ret = btrfs_insert_empty_item(&trans, root, path, &key, 0);
if (ret)
test_msg("Failed to insert backref\n");
btrfs_free_path(path);
return ret;
}
static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
struct btrfs_trans_handle trans;
struct btrfs_key key;
struct btrfs_path *path;
int ret;
init_dummy_trans(&trans);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
path = btrfs_alloc_path();
if (!path) {
test_msg("Couldn't allocate path\n");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
test_msg("Didn't find our key %d\n", ret);
btrfs_free_path(path);
return ret;
}
btrfs_del_item(&trans, root, path);
btrfs_free_path(path);
return 0;
}
static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 parent, u64 root_objectid)
{
struct btrfs_trans_handle trans;
struct btrfs_extent_item *item;
struct btrfs_path *path;
struct btrfs_key key;
u64 refs;
int ret;
init_dummy_trans(&trans);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
path = btrfs_alloc_path();
if (!path) {
test_msg("Couldn't allocate path\n");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
test_msg("Couldn't find extent ref\n");
btrfs_free_path(path);
return ret;
}
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_extent_item);
refs = btrfs_extent_refs(path->nodes[0], item);
btrfs_set_extent_refs(path->nodes[0], item, refs - 1);
btrfs_release_path(path);
key.objectid = bytenr;
if (parent) {
key.type = BTRFS_SHARED_BLOCK_REF_KEY;
key.offset = parent;
} else {
key.type = BTRFS_TREE_BLOCK_REF_KEY;
key.offset = root_objectid;
}
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
test_msg("Couldn't find backref %d\n", ret);
btrfs_free_path(path);
return ret;
}
btrfs_del_item(&trans, root, path);
btrfs_free_path(path);
return ret;
}
static int test_no_shared_qgroup(struct btrfs_root *root)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
init_dummy_trans(&trans);
test_msg("Qgroup basic add\n");
ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
}
ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
BTRFS_QGROUP_OPER_ADD_EXCL, 0);
if (ret) {
test_msg("Couldn't add space to a qgroup %d\n", ret);
return ret;
}
ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
if (ret)
return ret;
ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
if (ret) {
test_msg("Delayed qgroup accounting failed %d\n", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
ret = remove_extent_item(root, 4096, 4096);
if (ret)
return -EINVAL;
ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
BTRFS_QGROUP_OPER_SUB_EXCL, 0);
if (ret) {
test_msg("Couldn't remove space from the qgroup %d\n", ret);
return -EINVAL;
}
ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
if (ret) {
test_msg("Qgroup accounting failed %d\n", ret);
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
return 0;
}
/*
* Add a ref for two different roots to make sure the shared value comes out
* right, also remove one of the roots and make sure the exclusive count is
* adjusted properly.
*/
static int test_multiple_refs(struct btrfs_root *root)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
init_dummy_trans(&trans);
test_msg("Qgroup multiple refs test\n");
/* We have 5 created already from the previous test */
ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
}
ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
if (ret)
return ret;
ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
BTRFS_QGROUP_OPER_ADD_EXCL, 0);
if (ret) {
test_msg("Couldn't add space to a qgroup %d\n", ret);
return ret;
}
ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
if (ret) {
test_msg("Delayed qgroup accounting failed %d\n", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
ret = add_tree_ref(root, 4096, 4096, 0, 256);
if (ret)
return ret;
ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
BTRFS_QGROUP_OPER_ADD_SHARED, 0);
if (ret) {
test_msg("Qgroup record ref failed %d\n", ret);
return ret;
}
ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
if (ret) {
test_msg("Qgroup accounting failed %d\n", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
ret = remove_extent_ref(root, 4096, 4096, 0, 256);
if (ret)
return ret;
ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
BTRFS_QGROUP_OPER_SUB_SHARED, 0);
if (ret) {
test_msg("Qgroup record ref failed %d\n", ret);
return ret;
}
ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
if (ret) {
test_msg("Qgroup accounting failed %d\n", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
return 0;
}
int btrfs_test_qgroups(void)
{
struct btrfs_root *root;
struct btrfs_root *tmp_root;
int ret = 0;
root = btrfs_alloc_dummy_root();
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
return PTR_ERR(root);
}
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info) {
test_msg("Couldn't allocate dummy fs info\n");
ret = -ENOMEM;
goto out;
}
/*
* Can't use bytenr 0, some things freak out
* *cough*backref walking code*cough*
*/
root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM;
goto out;
}
btrfs_set_header_level(root->node, 0);
btrfs_set_header_nritems(root->node, 0);
root->alloc_bytenr += 8192;
tmp_root = btrfs_alloc_dummy_root();
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
tmp_root->root_key.objectid = 5;
root->fs_info->fs_root = tmp_root;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
test_msg("Couldn't insert fs root %d\n", ret);
goto out;
}
tmp_root = btrfs_alloc_dummy_root();
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
tmp_root->root_key.objectid = 256;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
test_msg("Couldn't insert fs root %d\n", ret);
goto out;
}
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
/*
* Some of the paths we test assume we have a filled out fs_info, so we
* just need to addt he root in there so we don't panic.
*/
root->fs_info->tree_root = root;
root->fs_info->quota_root = root;
root->fs_info->quota_enabled = 1;
test_msg("Running qgroup tests\n");
ret = test_no_shared_qgroup(root);
if (ret)
goto out;
ret = test_multiple_refs(root);
out:
btrfs_free_dummy_root(root);
return ret;
}

2054
fs/btrfs/transaction.c Normal file

File diff suppressed because it is too large Load diff

175
fs/btrfs/transaction.h Normal file
View file

@ -0,0 +1,175 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_TRANSACTION__
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
#include "delayed-ref.h"
#include "ctree.h"
enum btrfs_trans_state {
TRANS_STATE_RUNNING = 0,
TRANS_STATE_BLOCKED = 1,
TRANS_STATE_COMMIT_START = 2,
TRANS_STATE_COMMIT_DOING = 3,
TRANS_STATE_UNBLOCKED = 4,
TRANS_STATE_COMPLETED = 5,
TRANS_STATE_MAX = 6,
};
struct btrfs_transaction {
u64 transid;
/*
* total external writers(USERSPACE/START/ATTACH) in this
* transaction, it must be zero before the transaction is
* being committed
*/
atomic_t num_extwriters;
/*
* total writers in this transaction, it must be zero before the
* transaction can end
*/
atomic_t num_writers;
atomic_t use_count;
/* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state;
struct list_head list;
struct extent_io_tree dirty_pages;
unsigned long start_time;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
struct list_head pending_chunks;
struct list_head pending_ordered;
struct list_head switch_commits;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
};
#define __TRANS_FREEZABLE (1U << 0)
#define __TRANS_USERSPACE (1U << 8)
#define __TRANS_START (1U << 9)
#define __TRANS_ATTACH (1U << 10)
#define __TRANS_JOIN (1U << 11)
#define __TRANS_JOIN_NOLOCK (1U << 12)
#define __TRANS_DUMMY (1U << 13)
#define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE)
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
#define TRANS_ATTACH (__TRANS_ATTACH)
#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE)
#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK)
#define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \
__TRANS_ATTACH)
#define BTRFS_SEND_TRANS_STUB ((void *)1)
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
u64 qgroup_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
unsigned long blocks_used;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv;
short aborted;
short adding_csums;
bool allocating_chunk;
bool reloc_reserved;
bool sync;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
* start_transaction is the same as the one passed to end_transaction.
* Subvolume quota depends on this
*/
struct btrfs_root *root;
struct seq_list delayed_ref_elem;
struct list_head ordered;
struct list_head qgroup_ref_list;
struct list_head new_bgs;
};
struct btrfs_pending_snapshot {
struct dentry *dentry;
struct inode *dir;
struct btrfs_root *root;
struct btrfs_root *snap;
struct btrfs_qgroup_inherit *inherit;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
u64 qgroup_reserved;
/* extra metadata reseration for relocation */
int error;
bool readonly;
struct list_head list;
};
static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
struct inode *inode)
{
BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
}
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_items);
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root, int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
void btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root);
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int wait_for_unblock);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
void btrfs_throttle(struct btrfs_root *root);
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
void btrfs_put_transaction(struct btrfs_transaction *transaction);
#endif

139
fs/btrfs/tree-defrag.c Normal file
View file

@ -0,0 +1,139 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
#include "transaction.h"
#include "locking.h"
/*
* Defrag all the leaves in a given btree.
* Read all the leaves and try to get key order to
* better reflect disk order
*/
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_path *path = NULL;
struct btrfs_key key;
int ret = 0;
int wret;
int level;
int next_key_ret = 0;
u64 last_ret = 0;
u64 min_trans = 0;
if (root->fs_info->extent_root == root) {
/*
* there's recursion here right now in the tree locking,
* we can't defrag the extent root without deadlock
*/
goto out;
}
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
goto out;
if (btrfs_test_opt(root, SSD))
goto out;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
level = btrfs_header_level(root->node);
if (level == 0)
goto out;
if (root->defrag_progress.objectid == 0) {
struct extent_buffer *root_node;
u32 nritems;
root_node = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(root_node);
nritems = btrfs_header_nritems(root_node);
root->defrag_max.objectid = 0;
/* from above we know this is not a leaf */
btrfs_node_key_to_cpu(root_node, &root->defrag_max,
nritems - 1);
btrfs_tree_unlock(root_node);
free_extent_buffer(root_node);
memset(&key, 0, sizeof(key));
} else {
memcpy(&key, &root->defrag_progress, sizeof(key));
}
path->keep_locks = 1;
ret = btrfs_search_forward(root, &key, path, min_trans);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
goto out;
}
btrfs_release_path(path);
wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (wret < 0) {
ret = wret;
goto out;
}
if (!path->nodes[1]) {
ret = 0;
goto out;
}
path->slots[1] = btrfs_header_nritems(path->nodes[1]);
next_key_ret = btrfs_find_next_key(root, path, &key, 1,
min_trans);
ret = btrfs_realloc_node(trans, root,
path->nodes[1], 0,
&last_ret,
&root->defrag_progress);
if (ret) {
WARN_ON(ret == -EAGAIN);
goto out;
}
if (next_key_ret == 0) {
memcpy(&root->defrag_progress, &key, sizeof(key));
ret = -EAGAIN;
}
out:
if (path)
btrfs_free_path(path);
if (ret == -EAGAIN) {
if (root->defrag_max.objectid > root->defrag_progress.objectid)
goto done;
if (root->defrag_max.type > root->defrag_progress.type)
goto done;
if (root->defrag_max.offset > root->defrag_progress.offset)
goto done;
ret = 0;
}
done:
if (ret != -EAGAIN) {
memset(&root->defrag_progress, 0,
sizeof(root->defrag_progress));
root->defrag_trans_start = trans->transid;
}
return ret;
}

4659
fs/btrfs/tree-log.c Normal file

File diff suppressed because it is too large Load diff

83
fs/btrfs/tree-log.h Normal file
View file

@ -0,0 +1,83 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __TREE_LOG_
#define __TREE_LOG_
#include "ctree.h"
#include "transaction.h"
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
struct btrfs_log_ctx {
int log_ret;
int log_transid;
int io_err;
struct list_head list;
};
static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
{
ctx->log_ret = 0;
ctx->log_transid = 0;
ctx->io_err = 0;
INIT_LIST_HEAD(&ctx->list);
}
static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid;
}
static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
return ACCESS_ONCE(fs_info->last_trans_log_full_commit) ==
trans->transid;
}
int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_log_ctx *ctx);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
const loff_t start,
const loff_t end,
struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
struct inode *dir, u64 index);
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
struct inode *inode, u64 dirid);
void btrfs_end_log_trans(struct btrfs_root *root);
int btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode,
int for_rename);
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *old_dir,
struct dentry *parent);
#endif

251
fs/btrfs/ulist.c Normal file
View file

@ -0,0 +1,251 @@
/*
* Copyright (C) 2011 STRATO AG
* written by Arne Jansen <sensille@gmx.net>
* Distributed under the GNU GPL license version 2.
*/
#include <linux/slab.h>
#include "ulist.h"
#include "ctree.h"
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
* enumerating it.
* It is possible to store an auxiliary value along with the key.
*
* A sample usage for ulists is the enumeration of directed graphs without
* visiting a node twice. The pseudo-code could look like this:
*
* ulist = ulist_alloc();
* ulist_add(ulist, root);
* ULIST_ITER_INIT(&uiter);
*
* while ((elem = ulist_next(ulist, &uiter)) {
* for (all child nodes n in elem)
* ulist_add(ulist, n);
* do something useful with the node;
* }
* ulist_free(ulist);
*
* This assumes the graph nodes are adressable by u64. This stems from the
* usage for tree enumeration in btrfs, where the logical addresses are
* 64 bit.
*
* It is also useful for tree enumeration which could be done elegantly
* recursively, but is not possible due to kernel stack limitations. The
* loop would be similar to the above.
*/
/**
* ulist_init - freshly initialize a ulist
* @ulist: the ulist to initialize
*
* Note: don't use this function to init an already used ulist, use
* ulist_reinit instead.
*/
void ulist_init(struct ulist *ulist)
{
INIT_LIST_HEAD(&ulist->nodes);
ulist->root = RB_ROOT;
ulist->nnodes = 0;
}
/**
* ulist_fini - free up additionally allocated memory for the ulist
* @ulist: the ulist from which to free the additional memory
*
* This is useful in cases where the base 'struct ulist' has been statically
* allocated.
*/
static void ulist_fini(struct ulist *ulist)
{
struct ulist_node *node;
struct ulist_node *next;
list_for_each_entry_safe(node, next, &ulist->nodes, list) {
kfree(node);
}
ulist->root = RB_ROOT;
INIT_LIST_HEAD(&ulist->nodes);
}
/**
* ulist_reinit - prepare a ulist for reuse
* @ulist: ulist to be reused
*
* Free up all additional memory allocated for the list elements and reinit
* the ulist.
*/
void ulist_reinit(struct ulist *ulist)
{
ulist_fini(ulist);
ulist_init(ulist);
}
/**
* ulist_alloc - dynamically allocate a ulist
* @gfp_mask: allocation flags to for base allocation
*
* The allocated ulist will be returned in an initialized state.
*/
struct ulist *ulist_alloc(gfp_t gfp_mask)
{
struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
if (!ulist)
return NULL;
ulist_init(ulist);
return ulist;
}
/**
* ulist_free - free dynamically allocated ulist
* @ulist: ulist to free
*
* It is not necessary to call ulist_fini before.
*/
void ulist_free(struct ulist *ulist)
{
if (!ulist)
return;
ulist_fini(ulist);
kfree(ulist);
}
static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
{
struct rb_node *n = ulist->root.rb_node;
struct ulist_node *u = NULL;
while (n) {
u = rb_entry(n, struct ulist_node, rb_node);
if (u->val < val)
n = n->rb_right;
else if (u->val > val)
n = n->rb_left;
else
return u;
}
return NULL;
}
static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
{
struct rb_node **p = &ulist->root.rb_node;
struct rb_node *parent = NULL;
struct ulist_node *cur = NULL;
while (*p) {
parent = *p;
cur = rb_entry(parent, struct ulist_node, rb_node);
if (cur->val < ins->val)
p = &(*p)->rb_right;
else if (cur->val > ins->val)
p = &(*p)->rb_left;
else
return -EEXIST;
}
rb_link_node(&ins->rb_node, parent, p);
rb_insert_color(&ins->rb_node, &ulist->root);
return 0;
}
/**
* ulist_add - add an element to the ulist
* @ulist: ulist to add the element to
* @val: value to add to ulist
* @aux: auxiliary value to store along with val
* @gfp_mask: flags to use for allocation
*
* Note: locking must be provided by the caller. In case of rwlocks write
* locking is needed
*
* Add an element to a ulist. The @val will only be added if it doesn't
* already exist. If it is added, the auxiliary value @aux is stored along with
* it. In case @val already exists in the ulist, @aux is ignored, even if
* it differs from the already stored value.
*
* ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
* inserted.
* In case of allocation failure -ENOMEM is returned and the ulist stays
* unaltered.
*/
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
{
return ulist_add_merge(ulist, val, aux, NULL, gfp_mask);
}
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask)
{
int ret;
struct ulist_node *node;
node = ulist_rbtree_search(ulist, val);
if (node) {
if (old_aux)
*old_aux = node->aux;
return 0;
}
node = kmalloc(sizeof(*node), gfp_mask);
if (!node)
return -ENOMEM;
node->val = val;
node->aux = aux;
#ifdef CONFIG_BTRFS_DEBUG
node->seqnum = ulist->nnodes;
#endif
ret = ulist_rbtree_insert(ulist, node);
ASSERT(!ret);
list_add_tail(&node->list, &ulist->nodes);
ulist->nnodes++;
return 1;
}
/**
* ulist_next - iterate ulist
* @ulist: ulist to iterate
* @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator)
*
* Note: locking must be provided by the caller. In case of rwlocks only read
* locking is needed
*
* This function is used to iterate an ulist.
* It returns the next element from the ulist or %NULL when the
* end is reached. No guarantee is made with respect to the order in which
* the elements are returned. They might neither be returned in order of
* addition nor in ascending order.
* It is allowed to call ulist_add during an enumeration. Newly added items
* are guaranteed to show up in the running enumeration.
*/
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
{
struct ulist_node *node;
if (list_empty(&ulist->nodes))
return NULL;
if (uiter->cur_list && uiter->cur_list->next == &ulist->nodes)
return NULL;
if (uiter->cur_list) {
uiter->cur_list = uiter->cur_list->next;
} else {
uiter->cur_list = ulist->nodes.next;
#ifdef CONFIG_BTRFS_DEBUG
uiter->i = 0;
#endif
}
node = list_entry(uiter->cur_list, struct ulist_node, list);
#ifdef CONFIG_BTRFS_DEBUG
ASSERT(node->seqnum == uiter->i);
ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
uiter->i++;
#endif
return node;
}

80
fs/btrfs/ulist.h Normal file
View file

@ -0,0 +1,80 @@
/*
* Copyright (C) 2011 STRATO AG
* written by Arne Jansen <sensille@gmx.net>
* Distributed under the GNU GPL license version 2.
*
*/
#ifndef __ULIST__
#define __ULIST__
#include <linux/list.h>
#include <linux/rbtree.h>
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
* enumerating it.
* It is possible to store an auxiliary value along with the key.
*
*/
struct ulist_iterator {
#ifdef CONFIG_BTRFS_DEBUG
int i;
#endif
struct list_head *cur_list; /* hint to start search */
};
/*
* element of the list
*/
struct ulist_node {
u64 val; /* value to store */
u64 aux; /* auxiliary value saved along with the val */
#ifdef CONFIG_BTRFS_DEBUG
int seqnum; /* sequence number this node is added */
#endif
struct list_head list; /* used to link node */
struct rb_node rb_node; /* used to speed up search */
};
struct ulist {
/*
* number of elements stored in list
*/
unsigned long nnodes;
struct list_head nodes;
struct rb_root root;
};
void ulist_init(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(gfp_t gfp_mask);
void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask);
/* just like ulist_add_merge() but take a pointer for the aux data */
static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
void **old_aux, gfp_t gfp_mask)
{
#if BITS_PER_LONG == 32
u64 old64 = (uintptr_t)*old_aux;
int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
*old_aux = (void *)((uintptr_t)old64);
return ret;
#else
return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
#endif
}
struct ulist_node *ulist_next(struct ulist *ulist,
struct ulist_iterator *uiter);
#define ULIST_ITER_INIT(uiter) ((uiter)->cur_list = NULL)
#endif

354
fs/btrfs/uuid-tree.c Normal file
View file

@ -0,0 +1,354 @@
/*
* Copyright (C) STRATO AG 2013. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/uuid.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
{
key->type = type;
key->objectid = get_unaligned_le64(uuid);
key->offset = get_unaligned_le64(uuid + sizeof(u64));
}
/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
u8 type, u64 subid)
{
int ret;
struct btrfs_path *path = NULL;
struct extent_buffer *eb;
int slot;
u32 item_size;
unsigned long offset;
struct btrfs_key key;
if (WARN_ON_ONCE(!uuid_root)) {
ret = -ENOENT;
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
btrfs_uuid_to_key(uuid, type, &key);
ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
if (ret < 0) {
goto out;
} else if (ret > 0) {
ret = -ENOENT;
goto out;
}
eb = path->nodes[0];
slot = path->slots[0];
item_size = btrfs_item_size_nr(eb, slot);
offset = btrfs_item_ptr_offset(eb, slot);
ret = -ENOENT;
if (!IS_ALIGNED(item_size, sizeof(u64))) {
btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
(unsigned long)item_size);
goto out;
}
while (item_size) {
__le64 data;
read_extent_buffer(eb, &data, offset, sizeof(data));
if (le64_to_cpu(data) == subid) {
ret = 0;
break;
}
offset += sizeof(data);
item_size -= sizeof(data);
}
out:
btrfs_free_path(path);
return ret;
}
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
struct btrfs_root *uuid_root, u8 *uuid, u8 type,
u64 subid_cpu)
{
int ret;
struct btrfs_path *path = NULL;
struct btrfs_key key;
struct extent_buffer *eb;
int slot;
unsigned long offset;
__le64 subid_le;
ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu);
if (ret != -ENOENT)
return ret;
if (WARN_ON_ONCE(!uuid_root)) {
ret = -EINVAL;
goto out;
}
btrfs_uuid_to_key(uuid, type, &key);
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
sizeof(subid_le));
if (ret >= 0) {
/* Add an item for the type for the first time */
eb = path->nodes[0];
slot = path->slots[0];
offset = btrfs_item_ptr_offset(eb, slot);
} else if (ret == -EEXIST) {
/*
* An item with that type already exists.
* Extend the item and store the new subid at the end.
*/
btrfs_extend_item(uuid_root, path, sizeof(subid_le));
eb = path->nodes[0];
slot = path->slots[0];
offset = btrfs_item_ptr_offset(eb, slot);
offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
} else if (ret < 0) {
btrfs_warn(uuid_root->fs_info, "insert uuid item failed %d "
"(0x%016llx, 0x%016llx) type %u!",
ret, (unsigned long long)key.objectid,
(unsigned long long)key.offset, type);
goto out;
}
ret = 0;
subid_le = cpu_to_le64(subid_cpu);
write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
btrfs_mark_buffer_dirty(eb);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
struct btrfs_root *uuid_root, u8 *uuid, u8 type,
u64 subid)
{
int ret;
struct btrfs_path *path = NULL;
struct btrfs_key key;
struct extent_buffer *eb;
int slot;
unsigned long offset;
u32 item_size;
unsigned long move_dst;
unsigned long move_src;
unsigned long move_len;
if (WARN_ON_ONCE(!uuid_root)) {
ret = -EINVAL;
goto out;
}
btrfs_uuid_to_key(uuid, type, &key);
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
if (ret < 0) {
btrfs_warn(uuid_root->fs_info, "error %d while searching for uuid item!",
ret);
goto out;
}
if (ret > 0) {
ret = -ENOENT;
goto out;
}
eb = path->nodes[0];
slot = path->slots[0];
offset = btrfs_item_ptr_offset(eb, slot);
item_size = btrfs_item_size_nr(eb, slot);
if (!IS_ALIGNED(item_size, sizeof(u64))) {
btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
(unsigned long)item_size);
ret = -ENOENT;
goto out;
}
while (item_size) {
__le64 read_subid;
read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid));
if (le64_to_cpu(read_subid) == subid)
break;
offset += sizeof(read_subid);
item_size -= sizeof(read_subid);
}
if (!item_size) {
ret = -ENOENT;
goto out;
}
item_size = btrfs_item_size_nr(eb, slot);
if (item_size == sizeof(subid)) {
ret = btrfs_del_item(trans, uuid_root, path);
goto out;
}
move_dst = offset;
move_src = offset + sizeof(subid);
move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
memmove_extent_buffer(eb, move_dst, move_src, move_len);
btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1);
out:
btrfs_free_path(path);
return ret;
}
static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
u64 subid)
{
struct btrfs_trans_handle *trans;
int ret;
/* 1 - for the uuid item */
trans = btrfs_start_transaction(uuid_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid);
btrfs_end_transaction(trans, uuid_root);
out:
return ret;
}
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
u64))
{
struct btrfs_root *root = fs_info->uuid_root;
struct btrfs_key key;
struct btrfs_path *path;
int ret = 0;
struct extent_buffer *leaf;
int slot;
u32 item_size;
unsigned long offset;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
key.objectid = 0;
key.type = 0;
key.offset = 0;
again_search_slot:
ret = btrfs_search_forward(root, &key, path, 0);
if (ret) {
if (ret > 0)
ret = 0;
goto out;
}
while (1) {
cond_resched();
leaf = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.type != BTRFS_UUID_KEY_SUBVOL &&
key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
goto skip;
offset = btrfs_item_ptr_offset(leaf, slot);
item_size = btrfs_item_size_nr(leaf, slot);
if (!IS_ALIGNED(item_size, sizeof(u64))) {
btrfs_warn(fs_info, "uuid item with illegal size %lu!",
(unsigned long)item_size);
goto skip;
}
while (item_size) {
u8 uuid[BTRFS_UUID_SIZE];
__le64 subid_le;
u64 subid_cpu;
put_unaligned_le64(key.objectid, uuid);
put_unaligned_le64(key.offset, uuid + sizeof(u64));
read_extent_buffer(leaf, &subid_le, offset,
sizeof(subid_le));
subid_cpu = le64_to_cpu(subid_le);
ret = check_func(fs_info, uuid, key.type, subid_cpu);
if (ret < 0)
goto out;
if (ret > 0) {
btrfs_release_path(path);
ret = btrfs_uuid_iter_rem(root, uuid, key.type,
subid_cpu);
if (ret == 0) {
/*
* this might look inefficient, but the
* justification is that it is an
* exception that check_func returns 1,
* and that in the regular case only one
* entry per UUID exists.
*/
goto again_search_slot;
}
if (ret < 0 && ret != -ENOENT)
goto out;
}
item_size -= sizeof(subid_le);
offset += sizeof(subid_le);
}
skip:
ret = btrfs_next_item(root, path);
if (ret == 0)
continue;
else if (ret > 0)
ret = 0;
break;
}
out:
btrfs_free_path(path);
if (ret)
btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
return 0;
}

6659
fs/btrfs/volumes.c Normal file

File diff suppressed because it is too large Load diff

516
fs/btrfs/volumes.h Normal file
View file

@ -0,0 +1,516 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_VOLUMES_
#define __BTRFS_VOLUMES_
#include <linux/bio.h>
#include <linux/sort.h>
#include <linux/btrfs.h>
#include "async-thread.h"
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN (64 * 1024)
struct buffer_head;
struct btrfs_pending_bios {
struct bio *head;
struct bio *tail;
};
/*
* Use sequence counter to get consistent device stat data on
* 32-bit processors.
*/
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#include <linux/seqlock.h>
#define __BTRFS_NEED_DEVICE_DATA_ORDERED
#define btrfs_device_data_ordered_init(device) \
seqcount_init(&device->data_seqcount)
#else
#define btrfs_device_data_ordered_init(device) do { } while (0)
#endif
struct btrfs_device {
struct list_head dev_list;
struct list_head dev_alloc_list;
struct btrfs_fs_devices *fs_devices;
struct btrfs_root *dev_root;
struct rcu_string *name;
u64 generation;
spinlock_t io_lock ____cacheline_aligned;
int running_pending;
/* regular prio bios */
struct btrfs_pending_bios pending_bios;
/* WRITE_SYNC bios */
struct btrfs_pending_bios pending_sync_bios;
struct block_device *bdev;
/* the mode sent to blkdev_get */
fmode_t mode;
int writeable;
int in_fs_metadata;
int missing;
int can_discard;
int is_tgtdev_for_dev_replace;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
seqcount_t data_seqcount;
#endif
/* the internal btrfs device id */
u64 devid;
/* size of the device in memory */
u64 total_bytes;
/* size of the device on disk */
u64 disk_total_bytes;
/* bytes used */
u64 bytes_used;
/* optimal io alignment for this device */
u32 io_align;
/* optimal io width for this device */
u32 io_width;
/* type and info about this device */
u64 type;
/* minimal io size for this device */
u32 sector_size;
/* physical drive uuid (or lvm uuid) */
u8 uuid[BTRFS_UUID_SIZE];
/*
* size of the device on the current transaction
*
* This variant is update when committing the transaction,
* and protected by device_list_mutex
*/
u64 commit_total_bytes;
/* bytes used on the current transaction */
u64 commit_bytes_used;
/*
* used to manage the device which is resized
*
* It is protected by chunk_lock.
*/
struct list_head resized_list;
/* for sending down flush barriers */
int nobarriers;
struct bio *flush_bio;
struct completion flush_wait;
/* per-device scrub information */
struct scrub_ctx *scrub_device;
struct btrfs_work work;
struct rcu_head rcu;
struct work_struct rcu_work;
/* readahead state */
spinlock_t reada_lock;
atomic_t reada_in_flight;
u64 reada_next;
struct reada_zone *reada_curr_zone;
struct radix_tree_root reada_zones;
struct radix_tree_root reada_extents;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */
int dev_stats_valid;
/* Counter to record the change of device stats */
atomic_t dev_stats_ccnt;
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
};
/*
* If we read those variants at the context of their own lock, we needn't
* use the following helpers, reading them directly is safe.
*/
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#define BTRFS_DEVICE_GETSET_FUNCS(name) \
static inline u64 \
btrfs_device_get_##name(const struct btrfs_device *dev) \
{ \
u64 size; \
unsigned int seq; \
\
do { \
seq = read_seqcount_begin(&dev->data_seqcount); \
size = dev->name; \
} while (read_seqcount_retry(&dev->data_seqcount, seq)); \
return size; \
} \
\
static inline void \
btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
{ \
preempt_disable(); \
write_seqcount_begin(&dev->data_seqcount); \
dev->name = size; \
write_seqcount_end(&dev->data_seqcount); \
preempt_enable(); \
}
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
#define BTRFS_DEVICE_GETSET_FUNCS(name) \
static inline u64 \
btrfs_device_get_##name(const struct btrfs_device *dev) \
{ \
u64 size; \
\
preempt_disable(); \
size = dev->name; \
preempt_enable(); \
return size; \
} \
\
static inline void \
btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
{ \
preempt_disable(); \
dev->name = size; \
preempt_enable(); \
}
#else
#define BTRFS_DEVICE_GETSET_FUNCS(name) \
static inline u64 \
btrfs_device_get_##name(const struct btrfs_device *dev) \
{ \
return dev->name; \
} \
\
static inline void \
btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
{ \
dev->name = size; \
}
#endif
BTRFS_DEVICE_GETSET_FUNCS(total_bytes);
BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
u64 num_devices;
u64 open_devices;
u64 rw_devices;
u64 missing_devices;
u64 total_rw_bytes;
u64 total_devices;
struct block_device *latest_bdev;
/* all of the devices in the FS, protected by a mutex
* so we can safely walk it to write out the supers without
* worrying about add/remove by the multi-device code.
* Scrubbing super can kick off supers writing by holding
* this mutex lock.
*/
struct mutex device_list_mutex;
struct list_head devices;
struct list_head resized_devices;
/* devices not currently being allocated */
struct list_head alloc_list;
struct list_head list;
struct btrfs_fs_devices *seed;
int seeding;
int opened;
/* set when we find or add a device that doesn't have the
* nonrot flag set
*/
int rotating;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
/*
* we need the mirror number and stripe index to be passed around
* the call chain while we are processing end_io (especially errors).
* Really, what we need is a btrfs_bio structure that has this info
* and is properly sized with its stripe array, but we're not there
* quite yet. We have our own btrfs bioset, and all of the bios
* we allocate are actually btrfs_io_bios. We'll cram as much of
* struct btrfs_bio as we can into this over time.
*/
typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
struct btrfs_io_bio {
unsigned int mirror_num;
unsigned int stripe_index;
u64 logical;
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
u8 *csum_allocated;
btrfs_io_bio_end_io_t *end_io;
struct bio bio;
};
static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
{
return container_of(bio, struct btrfs_io_bio, bio);
}
struct btrfs_bio_stripe {
struct btrfs_device *dev;
u64 physical;
u64 length; /* only used for discard mappings */
};
struct btrfs_bio;
typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
#define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1
struct btrfs_bio {
atomic_t stripes_pending;
struct btrfs_fs_info *fs_info;
bio_end_io_t *end_io;
struct bio *orig_bio;
unsigned long flags;
void *private;
atomic_t error;
int max_errors;
int num_stripes;
int mirror_num;
struct btrfs_bio_stripe stripes[];
};
struct btrfs_device_info {
struct btrfs_device *dev;
u64 dev_offset;
u64 max_avail;
u64 total_avail;
};
struct btrfs_raid_attr {
int sub_stripes; /* sub_stripes info for map */
int dev_stripes; /* stripes per dev */
int devs_max; /* max devs to use */
int devs_min; /* min devs needed */
int devs_increment; /* ndevs has to be a multiple of this */
int ncopies; /* how many copies to data has */
};
struct map_lookup {
u64 type;
int io_align;
int io_width;
int stripe_len;
int sector_size;
int num_stripes;
int sub_stripes;
struct btrfs_bio_stripe stripes[];
};
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
/*
* Restriper's general type filter
*/
#define BTRFS_BALANCE_DATA (1ULL << 0)
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
#define BTRFS_BALANCE_METADATA (1ULL << 2)
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
BTRFS_BALANCE_SYSTEM | \
BTRFS_BALANCE_METADATA)
#define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)
/*
* Balance filters
*/
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be
* half-filled).
*/
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
struct btrfs_balance_args;
struct btrfs_balance_progress;
struct btrfs_balance_control {
struct btrfs_fs_info *fs_info;
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
struct btrfs_balance_args sys;
u64 flags;
struct btrfs_balance_progress stat;
};
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num);
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_root *root);
int btrfs_read_chunk_tree(struct btrfs_root *root);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step);
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path,
struct btrfs_device **device);
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
const u8 *uuid);
int btrfs_rm_device(struct btrfs_root *root, char *device_path);
void btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 new_size);
struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
struct btrfs_device *srcdev,
struct btrfs_device **device_out);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats);
void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev);
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
int btrfs_scratch_superblock(struct btrfs_device *device);
int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
u64 logical, u64 len, int mirror_num);
unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
struct btrfs_mapping_tree *map_tree,
u64 logical);
int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root,
u64 chunk_offset, u64 chunk_size);
int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 chunk_offset);
static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev)
{
return atomic_read(&dev->dev_stats_ccnt);
}
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
{
atomic_inc(dev->dev_stat_values + index);
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
int index)
{
return atomic_read(dev->dev_stat_values + index);
}
static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
int index)
{
int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0);
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
return ret;
}
static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
int index, unsigned long val)
{
atomic_set(dev->dev_stat_values + index, val);
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
int index)
{
btrfs_dev_stat_set(dev, index, 0);
}
void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
struct btrfs_transaction *transaction);
#endif

477
fs/btrfs/xattr.c Normal file
View file

@ -0,0 +1,477 @@
/*
* Copyright (C) 2007 Red Hat. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/rwsem.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
#include "xattr.h"
#include "disk-io.h"
#include "props.h"
ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size)
{
struct btrfs_dir_item *di;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
int ret = 0;
unsigned long data_ptr;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
/* lookup the xattr by name */
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
strlen(name), 0);
if (!di) {
ret = -ENODATA;
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
leaf = path->nodes[0];
/* if size is 0, that means we want the size of the attr */
if (!size) {
ret = btrfs_dir_data_len(leaf, di);
goto out;
}
/* now get the data out of our dir_item */
if (btrfs_dir_data_len(leaf, di) > size) {
ret = -ERANGE;
goto out;
}
/*
* The way things are packed into the leaf is like this
* |struct btrfs_dir_item|name|data|
* where name is the xattr name, so security.foo, and data is the
* content of the xattr. data_ptr points to the location in memory
* where the data starts in the in memory leaf
*/
data_ptr = (unsigned long)((char *)(di + 1) +
btrfs_dir_name_len(leaf, di));
read_extent_buffer(leaf, buffer, data_ptr,
btrfs_dir_data_len(leaf, di));
ret = btrfs_dir_data_len(leaf, di);
out:
btrfs_free_path(path);
return ret;
}
static int do_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
struct btrfs_dir_item *di;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
size_t name_len = strlen(name);
int ret = 0;
if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
return -ENOSPC;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
if (flags & XATTR_REPLACE) {
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
name_len, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
} else if (!di) {
ret = -ENODATA;
goto out;
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto out;
btrfs_release_path(path);
/*
* remove the attribute
*/
if (!value)
goto out;
} else {
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
name, name_len, 0);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
if (!di && !value)
goto out;
btrfs_release_path(path);
}
again:
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
/*
* If we're setting an xattr to a new value but the new value is say
* exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
* back from split_leaf. This is because it thinks we'll be extending
* the existing item size, but we're asking for enough space to add the
* item itself. So if we get EOVERFLOW just set ret to EEXIST and let
* the rest of the function figure it out.
*/
if (ret == -EOVERFLOW)
ret = -EEXIST;
if (ret == -EEXIST) {
if (flags & XATTR_CREATE)
goto out;
/*
* We can't use the path we already have since we won't have the
* proper locking for a delete, so release the path and
* re-lookup to delete the thing.
*/
btrfs_release_path(path);
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
name, name_len, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
} else if (!di) {
/* Shouldn't happen but just in case... */
btrfs_release_path(path);
goto again;
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto out;
/*
* We have a value to set, so go back and try to insert it now.
*/
if (value) {
btrfs_release_path(path);
goto again;
}
}
out:
btrfs_free_path(path);
return ret;
}
/*
* @value: "" makes the attribute to empty, NULL removes it
*/
int __btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
if (trans)
return do_setxattr(trans, inode, name, value, size, flags);
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = do_setxattr(trans, inode, name, value, size, flags);
if (ret)
goto out;
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
btrfs_end_transaction(trans, root);
return ret;
}
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct btrfs_key key, found_key;
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
int ret = 0, slot;
size_t total_size = 0, size_left = size;
unsigned long name_ptr;
size_t name_len;
/*
* ok we want all objects associated with this id.
* NOTE: we set key.offset = 0; because we want to start with the
* first xattr that we find and walk forward
*/
key.objectid = btrfs_ino(inode);
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->reada = 2;
/* search for our xattrs */
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto err;
while (1) {
leaf = path->nodes[0];
slot = path->slots[0];
/* this is where we start walking through the path */
if (slot >= btrfs_header_nritems(leaf)) {
/*
* if we've reached the last slot in this leaf we need
* to go to the next leaf and reset everything
*/
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto err;
else if (ret > 0)
break;
continue;
}
btrfs_item_key_to_cpu(leaf, &found_key, slot);
/* check to make sure this item is what we want */
if (found_key.objectid != key.objectid)
break;
if (found_key.type != BTRFS_XATTR_ITEM_KEY)
break;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(root, leaf, di))
goto next;
name_len = btrfs_dir_name_len(leaf, di);
total_size += name_len + 1;
/* we are just looking for how big our buffer needs to be */
if (!size)
goto next;
if (!buffer || (name_len + 1) > size_left) {
ret = -ERANGE;
goto err;
}
name_ptr = (unsigned long)(di + 1);
read_extent_buffer(leaf, buffer, name_ptr, name_len);
buffer[name_len] = '\0';
size_left -= name_len + 1;
buffer += name_len + 1;
next:
path->slots[0]++;
}
ret = total_size;
err:
btrfs_free_path(path);
return ret;
}
/*
* List of handlers for synthetic system.* attributes. All real ondisk
* attributes are handled directly.
*/
const struct xattr_handler *btrfs_xattr_handlers[] = {
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
&posix_acl_access_xattr_handler,
&posix_acl_default_xattr_handler,
#endif
NULL,
};
/*
* Check if the attribute is in a supported namespace.
*
* This is applied after the check for the synthetic attributes in the system
* namespace.
*/
static int btrfs_is_valid_xattr(const char *name)
{
int len = strlen(name);
int prefixlen = 0;
if (!strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN))
prefixlen = XATTR_SECURITY_PREFIX_LEN;
else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
prefixlen = XATTR_SYSTEM_PREFIX_LEN;
else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
prefixlen = XATTR_TRUSTED_PREFIX_LEN;
else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
prefixlen = XATTR_USER_PREFIX_LEN;
else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
prefixlen = XATTR_BTRFS_PREFIX_LEN;
else
return -EOPNOTSUPP;
/*
* The name cannot consist of just prefix
*/
if (len <= prefixlen)
return -EINVAL;
return 0;
}
ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
int ret;
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
* for it via sb->s_xattr.
*/
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_getxattr(dentry, name, buffer, size);
ret = btrfs_is_valid_xattr(name);
if (ret)
return ret;
return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
}
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
int ret;
/*
* The permission on security.* and system.* is not checked
* in permission().
*/
if (btrfs_root_readonly(root))
return -EROFS;
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
* for it via sb->s_xattr.
*/
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_setxattr(dentry, name, value, size, flags);
ret = btrfs_is_valid_xattr(name);
if (ret)
return ret;
if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
return btrfs_set_prop(dentry->d_inode, name,
value, size, flags);
if (size == 0)
value = ""; /* empty EA, do not remove */
return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
int ret;
/*
* The permission on security.* and system.* is not checked
* in permission().
*/
if (btrfs_root_readonly(root))
return -EROFS;
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
* for it via sb->s_xattr.
*/
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_removexattr(dentry, name);
ret = btrfs_is_valid_xattr(name);
if (ret)
return ret;
if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
return btrfs_set_prop(dentry->d_inode, name,
NULL, 0, XATTR_REPLACE);
return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
XATTR_REPLACE);
}
static int btrfs_initxattrs(struct inode *inode,
const struct xattr *xattr_array, void *fs_info)
{
const struct xattr *xattr;
struct btrfs_trans_handle *trans = fs_info;
char *name;
int err = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
strlen(xattr->name) + 1, GFP_NOFS);
if (!name) {
err = -ENOMEM;
break;
}
strcpy(name, XATTR_SECURITY_PREFIX);
strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
err = __btrfs_setxattr(trans, inode, name,
xattr->value, xattr->value_len, 0);
kfree(name);
if (err < 0)
break;
}
return err;
}
int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr)
{
return security_inode_init_security(inode, dir, qstr,
&btrfs_initxattrs, trans);
}

41
fs/btrfs/xattr.h Normal file
View file

@ -0,0 +1,41 @@
/*
* Copyright (C) 2007 Red Hat. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __XATTR__
#define __XATTR__
#include <linux/xattr.h>
extern const struct xattr_handler *btrfs_xattr_handlers[];
extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size);
extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags);
extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size);
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr);
#endif /* __XATTR__ */

412
fs/btrfs/zlib.c Normal file
View file

@ -0,0 +1,412 @@
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*
* Based on jffs2 zlib code:
* Copyright © 2001-2007 Red Hat, Inc.
* Created by David Woodhouse <dwmw2@infradead.org>
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/zlib.h>
#include <linux/zutil.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
#include "compression.h"
struct workspace {
z_stream strm;
char *buf;
struct list_head list;
};
static void zlib_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
vfree(workspace->strm.workspace);
kfree(workspace->buf);
kfree(workspace);
}
static struct list_head *zlib_alloc_workspace(void)
{
struct workspace *workspace;
int workspacesize;
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
if (!workspace)
return ERR_PTR(-ENOMEM);
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
workspace->strm.workspace = vmalloc(workspacesize);
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!workspace->strm.workspace || !workspace->buf)
goto fail;
INIT_LIST_HEAD(&workspace->list);
return &workspace->list;
fail:
zlib_free_workspace(&workspace->list);
return ERR_PTR(-ENOMEM);
}
static int zlib_compress_pages(struct list_head *ws,
struct address_space *mapping,
u64 start, unsigned long len,
struct page **pages,
unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret;
char *data_in;
char *cpage_out;
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
unsigned long bytes_left;
*out_pages = 0;
*total_out = 0;
*total_in = 0;
if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) {
printk(KERN_WARNING "BTRFS: deflateInit failed\n");
ret = -EIO;
goto out;
}
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
data_in = kmap(in_page);
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
cpage_out = kmap(out_page);
pages[0] = out_page;
nr_pages = 1;
workspace->strm.next_in = data_in;
workspace->strm.next_out = cpage_out;
workspace->strm.avail_out = PAGE_CACHE_SIZE;
workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE);
while (workspace->strm.total_in < len) {
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
if (ret != Z_OK) {
printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
ret);
zlib_deflateEnd(&workspace->strm);
ret = -EIO;
goto out;
}
/* we're making it bigger, give up */
if (workspace->strm.total_in > 8192 &&
workspace->strm.total_in <
workspace->strm.total_out) {
ret = -E2BIG;
goto out;
}
/* we need another page for writing out. Test this
* before the total_in so we will pull in a new page for
* the stream end if required
*/
if (workspace->strm.avail_out == 0) {
kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
cpage_out = kmap(out_page);
pages[nr_pages] = out_page;
nr_pages++;
workspace->strm.avail_out = PAGE_CACHE_SIZE;
workspace->strm.next_out = cpage_out;
}
/* we're all done */
if (workspace->strm.total_in >= len)
break;
/* we've read in a full page, get a new one */
if (workspace->strm.avail_in == 0) {
if (workspace->strm.total_out > max_out)
break;
bytes_left = len - workspace->strm.total_in;
kunmap(in_page);
page_cache_release(in_page);
start += PAGE_CACHE_SIZE;
in_page = find_get_page(mapping,
start >> PAGE_CACHE_SHIFT);
data_in = kmap(in_page);
workspace->strm.avail_in = min(bytes_left,
PAGE_CACHE_SIZE);
workspace->strm.next_in = data_in;
}
}
workspace->strm.avail_in = 0;
ret = zlib_deflate(&workspace->strm, Z_FINISH);
zlib_deflateEnd(&workspace->strm);
if (ret != Z_STREAM_END) {
ret = -EIO;
goto out;
}
if (workspace->strm.total_out >= workspace->strm.total_in) {
ret = -E2BIG;
goto out;
}
ret = 0;
*total_out = workspace->strm.total_out;
*total_in = workspace->strm.total_in;
out:
*out_pages = nr_pages;
if (out_page)
kunmap(out_page);
if (in_page) {
kunmap(in_page);
page_cache_release(in_page);
}
return ret;
}
static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
u64 disk_start,
struct bio_vec *bvec,
int vcnt,
size_t srclen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0, ret2;
int wbits = MAX_WBITS;
char *data_in;
size_t total_out = 0;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
unsigned long buf_start;
unsigned long pg_offset;
data_in = kmap(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
workspace->strm.next_out = workspace->buf;
workspace->strm.avail_out = PAGE_CACHE_SIZE;
pg_offset = 0;
/* If it's deflate, and it's got no preset dictionary, then
we can tell zlib to skip the adler32 check. */
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
((data_in[0] & 0x0f) == Z_DEFLATED) &&
!(((data_in[0]<<8) + data_in[1]) % 31)) {
wbits = -((data_in[0] >> 4) + 8);
workspace->strm.next_in += 2;
workspace->strm.avail_in -= 2;
}
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) {
printk(KERN_WARNING "BTRFS: inflateInit failed\n");
return -EIO;
}
while (workspace->strm.total_in < srclen) {
ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END)
break;
buf_start = total_out;
total_out = workspace->strm.total_out;
/* we didn't make progress in this inflate call, we're done */
if (buf_start == total_out)
break;
ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
total_out, disk_start,
bvec, vcnt,
&page_out_index, &pg_offset);
if (ret2 == 0) {
ret = 0;
goto done;
}
workspace->strm.next_out = workspace->buf;
workspace->strm.avail_out = PAGE_CACHE_SIZE;
if (workspace->strm.avail_in == 0) {
unsigned long tmp;
kunmap(pages_in[page_in_index]);
page_in_index++;
if (page_in_index >= total_pages_in) {
data_in = NULL;
break;
}
data_in = kmap(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
tmp = srclen - workspace->strm.total_in;
workspace->strm.avail_in = min(tmp,
PAGE_CACHE_SIZE);
}
}
if (ret != Z_STREAM_END)
ret = -EIO;
else
ret = 0;
done:
zlib_inflateEnd(&workspace->strm);
if (data_in)
kunmap(pages_in[page_in_index]);
if (!ret)
btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
return ret;
}
static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
int wbits = MAX_WBITS;
unsigned long bytes_left;
unsigned long total_out = 0;
unsigned long pg_offset = 0;
char *kaddr;
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes_left = destlen;
workspace->strm.next_in = data_in;
workspace->strm.avail_in = srclen;
workspace->strm.total_in = 0;
workspace->strm.next_out = workspace->buf;
workspace->strm.avail_out = PAGE_CACHE_SIZE;
workspace->strm.total_out = 0;
/* If it's deflate, and it's got no preset dictionary, then
we can tell zlib to skip the adler32 check. */
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
((data_in[0] & 0x0f) == Z_DEFLATED) &&
!(((data_in[0]<<8) + data_in[1]) % 31)) {
wbits = -((data_in[0] >> 4) + 8);
workspace->strm.next_in += 2;
workspace->strm.avail_in -= 2;
}
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) {
printk(KERN_WARNING "BTRFS: inflateInit failed\n");
return -EIO;
}
while (bytes_left > 0) {
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END)
break;
buf_start = total_out;
total_out = workspace->strm.total_out;
if (total_out == buf_start) {
ret = -EIO;
break;
}
if (total_out <= start_byte)
goto next;
if (total_out > start_byte && buf_start < start_byte)
buf_offset = start_byte - buf_start;
else
buf_offset = 0;
bytes = min(PAGE_CACHE_SIZE - pg_offset,
PAGE_CACHE_SIZE - buf_offset);
bytes = min(bytes, bytes_left);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
kunmap_atomic(kaddr);
pg_offset += bytes;
bytes_left -= bytes;
next:
workspace->strm.next_out = workspace->buf;
workspace->strm.avail_out = PAGE_CACHE_SIZE;
}
if (ret != Z_STREAM_END && bytes_left != 0)
ret = -EIO;
else
ret = 0;
zlib_inflateEnd(&workspace->strm);
/*
* this should only happen if zlib returned fewer bytes than we
* expected. btrfs_get_block is responsible for zeroing from the
* end of the inline extent (destlen) to the end of the page
*/
if (pg_offset < destlen) {
kaddr = kmap_atomic(dest_page);
memset(kaddr + pg_offset, 0, destlen - pg_offset);
kunmap_atomic(kaddr);
}
return ret;
}
struct btrfs_compress_op btrfs_zlib_compress = {
.alloc_workspace = zlib_alloc_workspace,
.free_workspace = zlib_free_workspace,
.compress_pages = zlib_compress_pages,
.decompress_biovec = zlib_decompress_biovec,
.decompress = zlib_decompress,
};