Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

24
fs/nilfs2/Kconfig Normal file
View file

@ -0,0 +1,24 @@
config NILFS2_FS
tristate "NILFS2 file system support"
select CRC32
help
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire
file system, users can even restore files mistakenly overwritten or
destroyed just a few seconds ago. Since this file system can keep
consistency like conventional LFS, it achieves quick recovery after
system crashes.
NILFS2 creates a number of checkpoints every few seconds or per
synchronous write basis (unless there is no change). Users can
select significant versions among continuously created checkpoints,
and can change them into snapshots which will be preserved for long
periods until they are changed back to checkpoints. Each
snapshot is mountable as a read-only file system concurrently with
its writable mount, and this feature is convenient for online backup.
Some features including atime, extended attributes, and POSIX ACLs,
are not supported yet.
To compile this file system support as a module, choose M here: the
module will be called nilfs2. If unsure, say N.

5
fs/nilfs2/Makefile Normal file
View file

@ -0,0 +1,5 @@
obj-$(CONFIG_NILFS2_FS) += nilfs2.o
nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
btnode.o bmap.o btree.o direct.o dat.o recovery.o \
the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
ifile.o alloc.o gcinode.o ioctl.o sysfs.o

784
fs/nilfs2/alloc.c Normal file
View file

@ -0,0 +1,784 @@
/*
* alloc.c - NILFS dat/inode allocator
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Original code was written by Koji Sato <koji@osrg.net>.
* Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
* Amagai Yoshiji <amagai@osrg.net>.
*/
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/bitops.h>
#include <linux/slab.h>
#include "mdt.h"
#include "alloc.h"
/**
* nilfs_palloc_groups_per_desc_block - get the number of groups that a group
* descriptor block can maintain
* @inode: inode of metadata file using this allocator
*/
static inline unsigned long
nilfs_palloc_groups_per_desc_block(const struct inode *inode)
{
return (1UL << inode->i_blkbits) /
sizeof(struct nilfs_palloc_group_desc);
}
/**
* nilfs_palloc_groups_count - get maximum number of groups
* @inode: inode of metadata file using this allocator
*/
static inline unsigned long
nilfs_palloc_groups_count(const struct inode *inode)
{
return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
}
/**
* nilfs_palloc_init_blockgroup - initialize private variables for allocator
* @inode: inode of metadata file using this allocator
* @entry_size: size of the persistent object
*/
int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS);
if (!mi->mi_bgl)
return -ENOMEM;
bgl_lock_init(mi->mi_bgl);
nilfs_mdt_set_entry_size(inode, entry_size, 0);
mi->mi_blocks_per_group =
DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode),
mi->mi_entries_per_block) + 1;
/* Number of blocks in a group including entry blocks and
a bitmap block */
mi->mi_blocks_per_desc_block =
nilfs_palloc_groups_per_desc_block(inode) *
mi->mi_blocks_per_group + 1;
/* Number of blocks per descriptor including the
descriptor block */
return 0;
}
/**
* nilfs_palloc_group - get group number and offset from an entry number
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
* @offset: pointer to store offset number in the group
*/
static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
unsigned long *offset)
{
__u64 group = nr;
*offset = do_div(group, nilfs_palloc_entries_per_group(inode));
return group;
}
/**
* nilfs_palloc_desc_blkoff - get block offset of a group descriptor block
* @inode: inode of metadata file using this allocator
* @group: group number
*
* nilfs_palloc_desc_blkoff() returns block offset of the descriptor
* block which contains a descriptor of the specified group.
*/
static unsigned long
nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
{
unsigned long desc_block =
group / nilfs_palloc_groups_per_desc_block(inode);
return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
}
/**
* nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block
* @inode: inode of metadata file using this allocator
* @group: group number
*
* nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
* block used to allocate/deallocate entries in the specified group.
*/
static unsigned long
nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
{
unsigned long desc_offset =
group % nilfs_palloc_groups_per_desc_block(inode);
return nilfs_palloc_desc_blkoff(inode, group) + 1 +
desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
}
/**
* nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
* @inode: inode of metadata file using this allocator
* @group: group number
* @desc: pointer to descriptor structure for the group
*/
static unsigned long
nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
const struct nilfs_palloc_group_desc *desc)
{
unsigned long nfree;
spin_lock(nilfs_mdt_bgl_lock(inode, group));
nfree = le32_to_cpu(desc->pg_nfrees);
spin_unlock(nilfs_mdt_bgl_lock(inode, group));
return nfree;
}
/**
* nilfs_palloc_group_desc_add_entries - adjust count of free entries
* @inode: inode of metadata file using this allocator
* @group: group number
* @desc: pointer to descriptor structure for the group
* @n: delta to be added
*/
static void
nilfs_palloc_group_desc_add_entries(struct inode *inode,
unsigned long group,
struct nilfs_palloc_group_desc *desc,
u32 n)
{
spin_lock(nilfs_mdt_bgl_lock(inode, group));
le32_add_cpu(&desc->pg_nfrees, n);
spin_unlock(nilfs_mdt_bgl_lock(inode, group));
}
/**
* nilfs_palloc_entry_blkoff - get block offset of an entry block
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
*/
static unsigned long
nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
{
unsigned long group, group_offset;
group = nilfs_palloc_group(inode, nr, &group_offset);
return nilfs_palloc_bitmap_blkoff(inode, group) + 1 +
group_offset / NILFS_MDT(inode)->mi_entries_per_block;
}
/**
* nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
* @inode: inode of metadata file
* @bh: buffer head of the buffer to be initialized
* @kaddr: kernel address mapped for the page including the buffer
*/
static void nilfs_palloc_desc_block_init(struct inode *inode,
struct buffer_head *bh, void *kaddr)
{
struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh);
unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
__le32 nfrees;
nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode));
while (n-- > 0) {
desc->pg_nfrees = nfrees;
desc++;
}
}
static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
int create,
void (*init_block)(struct inode *,
struct buffer_head *,
void *),
struct buffer_head **bhp,
struct nilfs_bh_assoc *prev,
spinlock_t *lock)
{
int ret;
spin_lock(lock);
if (prev->bh && blkoff == prev->blkoff) {
get_bh(prev->bh);
*bhp = prev->bh;
spin_unlock(lock);
return 0;
}
spin_unlock(lock);
ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
if (!ret) {
spin_lock(lock);
/*
* The following code must be safe for change of the
* cache contents during the get block call.
*/
brelse(prev->bh);
get_bh(*bhp);
prev->bh = *bhp;
prev->blkoff = blkoff;
spin_unlock(lock);
}
return ret;
}
/**
* nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
* @inode: inode of metadata file using this allocator
* @group: group number
* @create: create flag
* @bhp: pointer to store the resultant buffer head
*/
static int nilfs_palloc_get_desc_block(struct inode *inode,
unsigned long group,
int create, struct buffer_head **bhp)
{
struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
return nilfs_palloc_get_block(inode,
nilfs_palloc_desc_blkoff(inode, group),
create, nilfs_palloc_desc_block_init,
bhp, &cache->prev_desc, &cache->lock);
}
/**
* nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block
* @inode: inode of metadata file using this allocator
* @group: group number
* @create: create flag
* @bhp: pointer to store the resultant buffer head
*/
static int nilfs_palloc_get_bitmap_block(struct inode *inode,
unsigned long group,
int create, struct buffer_head **bhp)
{
struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
return nilfs_palloc_get_block(inode,
nilfs_palloc_bitmap_blkoff(inode, group),
create, NULL, bhp,
&cache->prev_bitmap, &cache->lock);
}
/**
* nilfs_palloc_get_entry_block - get buffer head of an entry block
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
* @create: create flag
* @bhp: pointer to store the resultant buffer head
*/
int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
int create, struct buffer_head **bhp)
{
struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
return nilfs_palloc_get_block(inode,
nilfs_palloc_entry_blkoff(inode, nr),
create, NULL, bhp,
&cache->prev_entry, &cache->lock);
}
/**
* nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
* @inode: inode of metadata file using this allocator
* @group: group number
* @bh: buffer head of the buffer storing the group descriptor block
* @kaddr: kernel address mapped for the page including the buffer
*/
static struct nilfs_palloc_group_desc *
nilfs_palloc_block_get_group_desc(const struct inode *inode,
unsigned long group,
const struct buffer_head *bh, void *kaddr)
{
return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) +
group % nilfs_palloc_groups_per_desc_block(inode);
}
/**
* nilfs_palloc_block_get_entry - get kernel address of an entry
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
* @bh: buffer head of the buffer storing the entry block
* @kaddr: kernel address mapped for the page including the buffer
*/
void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
const struct buffer_head *bh, void *kaddr)
{
unsigned long entry_offset, group_offset;
nilfs_palloc_group(inode, nr, &group_offset);
entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block;
return kaddr + bh_offset(bh) +
entry_offset * NILFS_MDT(inode)->mi_entry_size;
}
/**
* nilfs_palloc_find_available_slot - find available slot in a group
* @inode: inode of metadata file using this allocator
* @group: group number
* @target: offset number of an entry in the group (start point)
* @bitmap: bitmap of the group
* @bsize: size in bits
*/
static int nilfs_palloc_find_available_slot(struct inode *inode,
unsigned long group,
unsigned long target,
unsigned char *bitmap,
int bsize)
{
int curr, pos, end, i;
if (target > 0) {
end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1);
if (end > bsize)
end = bsize;
pos = nilfs_find_next_zero_bit(bitmap, end, target);
if (pos < end &&
!nilfs_set_bit_atomic(
nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
return pos;
} else
end = 0;
for (i = 0, curr = end;
i < bsize;
i += BITS_PER_LONG, curr += BITS_PER_LONG) {
/* wrap around */
if (curr >= bsize)
curr = 0;
while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
!= ~0UL) {
end = curr + BITS_PER_LONG;
if (end > bsize)
end = bsize;
pos = nilfs_find_next_zero_bit(bitmap, end, curr);
if ((pos < end) &&
!nilfs_set_bit_atomic(
nilfs_mdt_bgl_lock(inode, group), pos,
bitmap))
return pos;
}
}
return -ENOSPC;
}
/**
* nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups
* in a group descriptor block
* @inode: inode of metadata file using this allocator
* @curr: current group number
* @max: maximum number of groups
*/
static unsigned long
nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
unsigned long curr, unsigned long max)
{
return min_t(unsigned long,
nilfs_palloc_groups_per_desc_block(inode) -
curr % nilfs_palloc_groups_per_desc_block(inode),
max - curr + 1);
}
/**
* nilfs_palloc_count_desc_blocks - count descriptor blocks number
* @inode: inode of metadata file using this allocator
* @desc_blocks: descriptor blocks number [out]
*/
static int nilfs_palloc_count_desc_blocks(struct inode *inode,
unsigned long *desc_blocks)
{
unsigned long blknum;
int ret;
ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
if (likely(!ret))
*desc_blocks = DIV_ROUND_UP(
blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block);
return ret;
}
/**
* nilfs_palloc_mdt_file_can_grow - check potential opportunity for
* MDT file growing
* @inode: inode of metadata file using this allocator
* @desc_blocks: known current descriptor blocks count
*/
static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
unsigned long desc_blocks)
{
return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) <
nilfs_palloc_groups_count(inode);
}
/**
* nilfs_palloc_count_max_entries - count max number of entries that can be
* described by descriptor blocks count
* @inode: inode of metadata file using this allocator
* @nused: current number of used entries
* @nmaxp: max number of entries [out]
*/
int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
{
unsigned long desc_blocks = 0;
u64 entries_per_desc_block, nmax;
int err;
err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks);
if (unlikely(err))
return err;
entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) *
nilfs_palloc_groups_per_desc_block(inode);
nmax = entries_per_desc_block * desc_blocks;
if (nused == nmax &&
nilfs_palloc_mdt_file_can_grow(inode, desc_blocks))
nmax += entries_per_desc_block;
if (nused > nmax)
return -ERANGE;
*nmaxp = nmax;
return 0;
}
/**
* nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
*/
int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
void *desc_kaddr, *bitmap_kaddr;
unsigned long group, maxgroup, ngroups;
unsigned long group_offset, maxgroup_offset;
unsigned long n, entries_per_group, groups_per_desc_block;
unsigned long i, j;
int pos, ret;
ngroups = nilfs_palloc_groups_count(inode);
maxgroup = ngroups - 1;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
entries_per_group = nilfs_palloc_entries_per_group(inode);
groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
for (i = 0; i < ngroups; i += n) {
if (group >= ngroups) {
/* wrap around */
group = 0;
maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
&maxgroup_offset) - 1;
}
ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
if (ret < 0)
return ret;
desc_kaddr = kmap(desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(
inode, group, desc_bh, desc_kaddr);
n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
maxgroup);
for (j = 0; j < n; j++, desc++, group++) {
if (nilfs_palloc_group_desc_nfrees(inode, group, desc)
> 0) {
ret = nilfs_palloc_get_bitmap_block(
inode, group, 1, &bitmap_bh);
if (ret < 0)
goto out_desc;
bitmap_kaddr = kmap(bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
pos = nilfs_palloc_find_available_slot(
inode, group, group_offset, bitmap,
entries_per_group);
if (pos >= 0) {
/* found a free entry */
nilfs_palloc_group_desc_add_entries(
inode, group, desc, -1);
req->pr_entry_nr =
entries_per_group * group + pos;
kunmap(desc_bh->b_page);
kunmap(bitmap_bh->b_page);
req->pr_desc_bh = desc_bh;
req->pr_bitmap_bh = bitmap_bh;
return 0;
}
kunmap(bitmap_bh->b_page);
brelse(bitmap_bh);
}
group_offset = 0;
}
kunmap(desc_bh->b_page);
brelse(desc_bh);
}
/* no entries left */
return -ENOSPC;
out_desc:
kunmap(desc_bh->b_page);
brelse(desc_bh);
return ret;
}
/**
* nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
*/
void nilfs_palloc_commit_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
mark_buffer_dirty(req->pr_bitmap_bh);
mark_buffer_dirty(req->pr_desc_bh);
nilfs_mdt_mark_dirty(inode);
brelse(req->pr_bitmap_bh);
brelse(req->pr_desc_bh);
}
/**
* nilfs_palloc_commit_free_entry - finish deallocating a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the removal
*/
void nilfs_palloc_commit_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
struct nilfs_palloc_group_desc *desc;
unsigned long group, group_offset;
unsigned char *bitmap;
void *desc_kaddr, *bitmap_kaddr;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
desc_kaddr = kmap(req->pr_desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(inode, group,
req->pr_desc_bh, desc_kaddr);
bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
group_offset, bitmap))
printk(KERN_WARNING "%s: entry number %llu already freed\n",
__func__, (unsigned long long)req->pr_entry_nr);
else
nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
kunmap(req->pr_bitmap_bh->b_page);
kunmap(req->pr_desc_bh->b_page);
mark_buffer_dirty(req->pr_desc_bh);
mark_buffer_dirty(req->pr_bitmap_bh);
nilfs_mdt_mark_dirty(inode);
brelse(req->pr_bitmap_bh);
brelse(req->pr_desc_bh);
}
/**
* nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
*/
void nilfs_palloc_abort_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
struct nilfs_palloc_group_desc *desc;
void *desc_kaddr, *bitmap_kaddr;
unsigned char *bitmap;
unsigned long group, group_offset;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
desc_kaddr = kmap(req->pr_desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(inode, group,
req->pr_desc_bh, desc_kaddr);
bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
group_offset, bitmap))
printk(KERN_WARNING "%s: entry number %llu already freed\n",
__func__, (unsigned long long)req->pr_entry_nr);
else
nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
kunmap(req->pr_bitmap_bh->b_page);
kunmap(req->pr_desc_bh->b_page);
brelse(req->pr_bitmap_bh);
brelse(req->pr_desc_bh);
req->pr_entry_nr = 0;
req->pr_bitmap_bh = NULL;
req->pr_desc_bh = NULL;
}
/**
* nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the removal
*/
int nilfs_palloc_prepare_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
struct buffer_head *desc_bh, *bitmap_bh;
unsigned long group, group_offset;
int ret;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
if (ret < 0)
return ret;
ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh);
if (ret < 0) {
brelse(desc_bh);
return ret;
}
req->pr_desc_bh = desc_bh;
req->pr_bitmap_bh = bitmap_bh;
return 0;
}
/**
* nilfs_palloc_abort_free_entry - cancel deallocating a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the removal
*/
void nilfs_palloc_abort_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
brelse(req->pr_bitmap_bh);
brelse(req->pr_desc_bh);
req->pr_entry_nr = 0;
req->pr_bitmap_bh = NULL;
req->pr_desc_bh = NULL;
}
/**
* nilfs_palloc_group_is_in - judge if an entry is in a group
* @inode: inode of metadata file using this allocator
* @group: group number
* @nr: serial number of the entry (e.g. inode number)
*/
static int
nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
{
__u64 first, last;
first = group * nilfs_palloc_entries_per_group(inode);
last = first + nilfs_palloc_entries_per_group(inode) - 1;
return (nr >= first) && (nr <= last);
}
/**
* nilfs_palloc_freev - deallocate a set of persistent objects
* @inode: inode of metadata file using this allocator
* @entry_nrs: array of entry numbers to be deallocated
* @nitems: number of entries stored in @entry_nrs
*/
int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
void *desc_kaddr, *bitmap_kaddr;
unsigned long group, group_offset;
int i, j, n, ret;
for (i = 0; i < nitems; i = j) {
group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
if (ret < 0)
return ret;
ret = nilfs_palloc_get_bitmap_block(inode, group, 0,
&bitmap_bh);
if (ret < 0) {
brelse(desc_bh);
return ret;
}
desc_kaddr = kmap(desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(
inode, group, desc_bh, desc_kaddr);
bitmap_kaddr = kmap(bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
for (j = i, n = 0;
(j < nitems) && nilfs_palloc_group_is_in(inode, group,
entry_nrs[j]);
j++) {
nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
if (!nilfs_clear_bit_atomic(
nilfs_mdt_bgl_lock(inode, group),
group_offset, bitmap)) {
printk(KERN_WARNING
"%s: entry number %llu already freed\n",
__func__,
(unsigned long long)entry_nrs[j]);
} else {
n++;
}
}
nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
kunmap(bitmap_bh->b_page);
kunmap(desc_bh->b_page);
mark_buffer_dirty(desc_bh);
mark_buffer_dirty(bitmap_bh);
nilfs_mdt_mark_dirty(inode);
brelse(bitmap_bh);
brelse(desc_bh);
}
return 0;
}
void nilfs_palloc_setup_cache(struct inode *inode,
struct nilfs_palloc_cache *cache)
{
NILFS_MDT(inode)->mi_palloc_cache = cache;
spin_lock_init(&cache->lock);
}
void nilfs_palloc_clear_cache(struct inode *inode)
{
struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
spin_lock(&cache->lock);
brelse(cache->prev_desc.bh);
brelse(cache->prev_bitmap.bh);
brelse(cache->prev_entry.bh);
cache->prev_desc.bh = NULL;
cache->prev_bitmap.bh = NULL;
cache->prev_entry.bh = NULL;
spin_unlock(&cache->lock);
}
void nilfs_palloc_destroy_cache(struct inode *inode)
{
nilfs_palloc_clear_cache(inode);
NILFS_MDT(inode)->mi_palloc_cache = NULL;
}

110
fs/nilfs2/alloc.h Normal file
View file

@ -0,0 +1,110 @@
/*
* alloc.h - persistent object (dat entry/disk inode) allocator/deallocator
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Original code was written by Koji Sato <koji@osrg.net>.
* Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
* Amagai Yoshiji <amagai@osrg.net>.
*/
#ifndef _NILFS_ALLOC_H
#define _NILFS_ALLOC_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
/**
* nilfs_palloc_entries_per_group - get the number of entries per group
* @inode: inode of metadata file using this allocator
*
* The number of entries per group is defined by the number of bits
* that a bitmap block can maintain.
*/
static inline unsigned long
nilfs_palloc_entries_per_group(const struct inode *inode)
{
return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */);
}
int nilfs_palloc_init_blockgroup(struct inode *, unsigned);
int nilfs_palloc_get_entry_block(struct inode *, __u64, int,
struct buffer_head **);
void *nilfs_palloc_block_get_entry(const struct inode *, __u64,
const struct buffer_head *, void *);
int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *);
/**
* nilfs_palloc_req - persistent allocator request and reply
* @pr_entry_nr: entry number (vblocknr or inode number)
* @pr_desc_bh: buffer head of the buffer containing block group descriptors
* @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap
* @pr_entry_bh: buffer head of the buffer containing translation entries
*/
struct nilfs_palloc_req {
__u64 pr_entry_nr;
struct buffer_head *pr_desc_bh;
struct buffer_head *pr_bitmap_bh;
struct buffer_head *pr_entry_bh;
};
int nilfs_palloc_prepare_alloc_entry(struct inode *,
struct nilfs_palloc_req *);
void nilfs_palloc_commit_alloc_entry(struct inode *,
struct nilfs_palloc_req *);
void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *);
void nilfs_palloc_commit_free_entry(struct inode *, struct nilfs_palloc_req *);
int nilfs_palloc_prepare_free_entry(struct inode *, struct nilfs_palloc_req *);
void nilfs_palloc_abort_free_entry(struct inode *, struct nilfs_palloc_req *);
int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
#define nilfs_set_bit_atomic ext2_set_bit_atomic
#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
#define nilfs_find_next_zero_bit find_next_zero_bit_le
/**
* struct nilfs_bh_assoc - block offset and buffer head association
* @blkoff: block offset
* @bh: buffer head
*/
struct nilfs_bh_assoc {
unsigned long blkoff;
struct buffer_head *bh;
};
/**
* struct nilfs_palloc_cache - persistent object allocator cache
* @lock: cache protecting lock
* @prev_desc: blockgroup descriptors cache
* @prev_bitmap: blockgroup bitmap cache
* @prev_entry: translation entries cache
*/
struct nilfs_palloc_cache {
spinlock_t lock;
struct nilfs_bh_assoc prev_desc;
struct nilfs_bh_assoc prev_bitmap;
struct nilfs_bh_assoc prev_entry;
};
void nilfs_palloc_setup_cache(struct inode *inode,
struct nilfs_palloc_cache *cache);
void nilfs_palloc_clear_cache(struct inode *inode);
void nilfs_palloc_destroy_cache(struct inode *inode);
#endif /* _NILFS_ALLOC_H */

567
fs/nilfs2/bmap.c Normal file
View file

@ -0,0 +1,567 @@
/*
* bmap.c - NILFS block mapping.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/errno.h>
#include "nilfs.h"
#include "bmap.h"
#include "btree.h"
#include "direct.h"
#include "btnode.h"
#include "mdt.h"
#include "dat.h"
#include "alloc.h"
struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
{
struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
return nilfs->ns_dat;
}
static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
const char *fname, int err)
{
struct inode *inode = bmap->b_inode;
if (err == -EINVAL) {
nilfs_error(inode->i_sb, fname,
"broken bmap (inode number=%lu)\n", inode->i_ino);
err = -EIO;
}
return err;
}
/**
* nilfs_bmap_lookup_at_level - find a data block or node block
* @bmap: bmap
* @key: key
* @level: level
* @ptrp: place to store the value associated to @key
*
* Description: nilfs_bmap_lookup_at_level() finds a record whose key
* matches @key in the block at @level of the bmap.
*
* Return Value: On success, 0 is returned and the record associated with @key
* is stored in the place pointed by @ptrp. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - A record associated with @key does not exist.
*/
int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
__u64 *ptrp)
{
sector_t blocknr;
int ret;
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
if (ret < 0) {
ret = nilfs_bmap_convert_error(bmap, __func__, ret);
goto out;
}
if (NILFS_BMAP_USE_VBN(bmap)) {
ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
&blocknr);
if (!ret)
*ptrp = blocknr;
}
out:
up_read(&bmap->b_sem);
return ret;
}
int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
unsigned maxblocks)
{
int ret;
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks);
up_read(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
{
__u64 keys[NILFS_BMAP_SMALL_HIGH + 1];
__u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1];
int ret, n;
if (bmap->b_ops->bop_check_insert != NULL) {
ret = bmap->b_ops->bop_check_insert(bmap, key);
if (ret > 0) {
n = bmap->b_ops->bop_gather_data(
bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1);
if (n < 0)
return n;
ret = nilfs_btree_convert_and_insert(
bmap, key, ptr, keys, ptrs, n);
if (ret == 0)
bmap->b_u.u_flags |= NILFS_BMAP_LARGE;
return ret;
} else if (ret < 0)
return ret;
}
return bmap->b_ops->bop_insert(bmap, key, ptr);
}
/**
* nilfs_bmap_insert - insert a new key-record pair into a bmap
* @bmap: bmap
* @key: key
* @rec: record
*
* Description: nilfs_bmap_insert() inserts the new key-record pair specified
* by @key and @rec into @bmap.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-EEXIST - A record associated with @key already exist.
*/
int nilfs_bmap_insert(struct nilfs_bmap *bmap,
unsigned long key,
unsigned long rec)
{
int ret;
down_write(&bmap->b_sem);
ret = nilfs_bmap_do_insert(bmap, key, rec);
up_write(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
{
__u64 keys[NILFS_BMAP_LARGE_LOW + 1];
__u64 ptrs[NILFS_BMAP_LARGE_LOW + 1];
int ret, n;
if (bmap->b_ops->bop_check_delete != NULL) {
ret = bmap->b_ops->bop_check_delete(bmap, key);
if (ret > 0) {
n = bmap->b_ops->bop_gather_data(
bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1);
if (n < 0)
return n;
ret = nilfs_direct_delete_and_convert(
bmap, key, keys, ptrs, n);
if (ret == 0)
bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE;
return ret;
} else if (ret < 0)
return ret;
}
return bmap->b_ops->bop_delete(bmap, key);
}
int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
{
__u64 lastkey;
int ret;
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
up_read(&bmap->b_sem);
if (ret < 0)
ret = nilfs_bmap_convert_error(bmap, __func__, ret);
else
*key = lastkey;
return ret;
}
/**
* nilfs_bmap_delete - delete a key-record pair from a bmap
* @bmap: bmap
* @key: key
*
* Description: nilfs_bmap_delete() deletes the key-record pair specified by
* @key from @bmap.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - A record associated with @key does not exist.
*/
int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
{
int ret;
down_write(&bmap->b_sem);
ret = nilfs_bmap_do_delete(bmap, key);
up_write(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
{
__u64 lastkey;
int ret;
ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
if (ret < 0) {
if (ret == -ENOENT)
ret = 0;
return ret;
}
while (key <= lastkey) {
ret = nilfs_bmap_do_delete(bmap, lastkey);
if (ret < 0)
return ret;
ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
if (ret < 0) {
if (ret == -ENOENT)
ret = 0;
return ret;
}
}
return 0;
}
/**
* nilfs_bmap_truncate - truncate a bmap to a specified key
* @bmap: bmap
* @key: key
*
* Description: nilfs_bmap_truncate() removes key-record pairs whose keys are
* greater than or equal to @key from @bmap.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
{
int ret;
down_write(&bmap->b_sem);
ret = nilfs_bmap_do_truncate(bmap, key);
up_write(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
* nilfs_bmap_clear - free resources a bmap holds
* @bmap: bmap
*
* Description: nilfs_bmap_clear() frees resources associated with @bmap.
*/
void nilfs_bmap_clear(struct nilfs_bmap *bmap)
{
down_write(&bmap->b_sem);
if (bmap->b_ops->bop_clear != NULL)
bmap->b_ops->bop_clear(bmap);
up_write(&bmap->b_sem);
}
/**
* nilfs_bmap_propagate - propagate dirty state
* @bmap: bmap
* @bh: buffer head
*
* Description: nilfs_bmap_propagate() marks the buffers that directly or
* indirectly refer to the block specified by @bh dirty.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
{
int ret;
down_write(&bmap->b_sem);
ret = bmap->b_ops->bop_propagate(bmap, bh);
up_write(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
* nilfs_bmap_lookup_dirty_buffers -
* @bmap: bmap
* @listp: pointer to buffer head list
*/
void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
struct list_head *listp)
{
if (bmap->b_ops->bop_lookup_dirty_buffers != NULL)
bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp);
}
/**
* nilfs_bmap_assign - assign a new block number to a block
* @bmap: bmap
* @bhp: pointer to buffer head
* @blocknr: block number
* @binfo: block information
*
* Description: nilfs_bmap_assign() assigns the block number @blocknr to the
* buffer specified by @bh.
*
* Return Value: On success, 0 is returned and the buffer head of a newly
* create buffer and the block information associated with the buffer are
* stored in the place pointed by @bh and @binfo, respectively. On error, one
* of the following negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_bmap_assign(struct nilfs_bmap *bmap,
struct buffer_head **bh,
unsigned long blocknr,
union nilfs_binfo *binfo)
{
int ret;
down_write(&bmap->b_sem);
ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo);
up_write(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
* nilfs_bmap_mark - mark block dirty
* @bmap: bmap
* @key: key
* @level: level
*
* Description: nilfs_bmap_mark() marks the block specified by @key and @level
* as dirty.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
{
int ret;
if (bmap->b_ops->bop_mark == NULL)
return 0;
down_write(&bmap->b_sem);
ret = bmap->b_ops->bop_mark(bmap, key, level);
up_write(&bmap->b_sem);
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
* nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state
* @bmap: bmap
*
* Description: nilfs_test_and_clear() is the atomic operation to test and
* clear the dirty state of @bmap.
*
* Return Value: 1 is returned if @bmap is dirty, or 0 if clear.
*/
int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
{
int ret;
down_write(&bmap->b_sem);
ret = nilfs_bmap_dirty(bmap);
nilfs_bmap_clear_dirty(bmap);
up_write(&bmap->b_sem);
return ret;
}
/*
* Internal use only
*/
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
const struct buffer_head *bh)
{
struct buffer_head *pbh;
__u64 key;
key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
bmap->b_inode->i_blkbits);
for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
key++;
return key;
}
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
{
__s64 diff;
diff = key - bmap->b_last_allocated_key;
if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) &&
(bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) &&
(bmap->b_last_allocated_ptr + diff > 0))
return bmap->b_last_allocated_ptr + diff;
else
return NILFS_BMAP_INVALID_PTR;
}
#define NILFS_BMAP_GROUP_DIV 8
__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
{
struct inode *dat = nilfs_bmap_get_dat(bmap);
unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat);
unsigned long group = bmap->b_inode->i_ino / entries_per_group;
return group * entries_per_group +
(bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) *
(entries_per_group / NILFS_BMAP_GROUP_DIV);
}
static struct lock_class_key nilfs_bmap_dat_lock_key;
static struct lock_class_key nilfs_bmap_mdt_lock_key;
/**
* nilfs_bmap_read - read a bmap from an inode
* @bmap: bmap
* @raw_inode: on-disk inode
*
* Description: nilfs_bmap_read() initializes the bmap @bmap.
*
* Return Value: On success, 0 is returned. On error, the following negative
* error code is returned.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
{
if (raw_inode == NULL)
memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE);
else
memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE);
init_rwsem(&bmap->b_sem);
bmap->b_state = 0;
bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
switch (bmap->b_inode->i_ino) {
case NILFS_DAT_INO:
bmap->b_ptr_type = NILFS_BMAP_PTR_P;
bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
break;
case NILFS_CPFILE_INO:
case NILFS_SUFILE_INO:
bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
break;
case NILFS_IFILE_INO:
lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
/* Fall through */
default:
bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
break;
}
return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ?
nilfs_btree_init(bmap) : nilfs_direct_init(bmap);
}
/**
* nilfs_bmap_write - write back a bmap to an inode
* @bmap: bmap
* @raw_inode: on-disk inode
*
* Description: nilfs_bmap_write() stores @bmap in @raw_inode.
*/
void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
{
down_write(&bmap->b_sem);
memcpy(raw_inode->i_bmap, bmap->b_u.u_data,
NILFS_INODE_BMAP_SIZE * sizeof(__le64));
if (bmap->b_inode->i_ino == NILFS_DAT_INO)
bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
up_write(&bmap->b_sem);
}
void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
{
memset(&bmap->b_u, 0, NILFS_BMAP_SIZE);
init_rwsem(&bmap->b_sem);
bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
bmap->b_ptr_type = NILFS_BMAP_PTR_U;
bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
bmap->b_state = 0;
nilfs_btree_init_gc(bmap);
}
void nilfs_bmap_save(const struct nilfs_bmap *bmap,
struct nilfs_bmap_store *store)
{
memcpy(store->data, bmap->b_u.u_data, sizeof(store->data));
store->last_allocated_key = bmap->b_last_allocated_key;
store->last_allocated_ptr = bmap->b_last_allocated_ptr;
store->state = bmap->b_state;
}
void nilfs_bmap_restore(struct nilfs_bmap *bmap,
const struct nilfs_bmap_store *store)
{
memcpy(bmap->b_u.u_data, store->data, sizeof(store->data));
bmap->b_last_allocated_key = store->last_allocated_key;
bmap->b_last_allocated_ptr = store->last_allocated_ptr;
bmap->b_state = store->state;
}

277
fs/nilfs2/bmap.h Normal file
View file

@ -0,0 +1,277 @@
/*
* bmap.h - NILFS block mapping.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#ifndef _NILFS_BMAP_H
#define _NILFS_BMAP_H
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/nilfs2_fs.h>
#include "alloc.h"
#include "dat.h"
#define NILFS_BMAP_INVALID_PTR 0
#define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff))
struct nilfs_bmap;
/**
* union nilfs_bmap_ptr_req - request for bmap ptr
* @bpr_ptr: bmap pointer
* @bpr_req: request for persistent allocator
*/
union nilfs_bmap_ptr_req {
__u64 bpr_ptr;
struct nilfs_palloc_req bpr_req;
};
/**
* struct nilfs_bmap_stats - bmap statistics
* @bs_nblocks: number of blocks created or deleted
*/
struct nilfs_bmap_stats {
unsigned int bs_nblocks;
};
/**
* struct nilfs_bmap_operations - bmap operation table
*/
struct nilfs_bmap_operations {
int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *);
int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *,
unsigned);
int (*bop_insert)(struct nilfs_bmap *, __u64, __u64);
int (*bop_delete)(struct nilfs_bmap *, __u64);
void (*bop_clear)(struct nilfs_bmap *);
int (*bop_propagate)(struct nilfs_bmap *, struct buffer_head *);
void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *,
struct list_head *);
int (*bop_assign)(struct nilfs_bmap *,
struct buffer_head **,
sector_t,
union nilfs_binfo *);
int (*bop_mark)(struct nilfs_bmap *, __u64, int);
/* The following functions are internal use only. */
int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
int (*bop_check_delete)(struct nilfs_bmap *, __u64);
int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
};
#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64))
#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */)
#define NILFS_BMAP_NEW_PTR_INIT \
(1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1))
static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
{
return !!(ptr & NILFS_BMAP_NEW_PTR_INIT);
}
/**
* struct nilfs_bmap - bmap structure
* @b_u: raw data
* @b_sem: semaphore
* @b_inode: owner of bmap
* @b_ops: bmap operation table
* @b_last_allocated_key: last allocated key for data block
* @b_last_allocated_ptr: last allocated ptr for data block
* @b_ptr_type: pointer type
* @b_state: state
* @b_nchildren_per_block: maximum number of child nodes for non-root nodes
*/
struct nilfs_bmap {
union {
__u8 u_flags;
__le64 u_data[NILFS_BMAP_SIZE / sizeof(__le64)];
} b_u;
struct rw_semaphore b_sem;
struct inode *b_inode;
const struct nilfs_bmap_operations *b_ops;
__u64 b_last_allocated_key;
__u64 b_last_allocated_ptr;
int b_ptr_type;
int b_state;
__u16 b_nchildren_per_block;
};
/* pointer type */
#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */
#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single
version) */
#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple
versions) */
#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */
#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0)
/* state */
#define NILFS_BMAP_DIRTY 0x00000001
/**
* struct nilfs_bmap_store - shadow copy of bmap state
* @data: cached raw block mapping of on-disk inode
* @last_allocated_key: cached value of last allocated key for data block
* @last_allocated_ptr: cached value of last allocated ptr for data block
* @state: cached value of state field of bmap structure
*/
struct nilfs_bmap_store {
__le64 data[NILFS_BMAP_SIZE / sizeof(__le64)];
__u64 last_allocated_key;
__u64 last_allocated_ptr;
int state;
};
int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *);
int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long);
void nilfs_bmap_clear(struct nilfs_bmap *);
int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *);
void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *);
int nilfs_bmap_assign(struct nilfs_bmap *, struct buffer_head **,
unsigned long, union nilfs_binfo *);
int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *);
int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int);
void nilfs_bmap_init_gc(struct nilfs_bmap *);
void nilfs_bmap_save(const struct nilfs_bmap *, struct nilfs_bmap_store *);
void nilfs_bmap_restore(struct nilfs_bmap *, const struct nilfs_bmap_store *);
static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key,
__u64 *ptr)
{
return nilfs_bmap_lookup_at_level(bmap, key, 1, ptr);
}
/*
* Internal use only
*/
struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *);
static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *req,
struct inode *dat)
{
if (dat)
return nilfs_dat_prepare_alloc(dat, &req->bpr_req);
/* ignore target ptr */
req->bpr_ptr = bmap->b_last_allocated_ptr++;
return 0;
}
static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *req,
struct inode *dat)
{
if (dat)
nilfs_dat_commit_alloc(dat, &req->bpr_req);
}
static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *req,
struct inode *dat)
{
if (dat)
nilfs_dat_abort_alloc(dat, &req->bpr_req);
else
bmap->b_last_allocated_ptr--;
}
static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *req,
struct inode *dat)
{
return dat ? nilfs_dat_prepare_end(dat, &req->bpr_req) : 0;
}
static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *req,
struct inode *dat)
{
if (dat)
nilfs_dat_commit_end(dat, &req->bpr_req,
bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
}
static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *req,
struct inode *dat)
{
if (dat)
nilfs_dat_abort_end(dat, &req->bpr_req);
}
static inline void nilfs_bmap_set_target_v(struct nilfs_bmap *bmap, __u64 key,
__u64 ptr)
{
bmap->b_last_allocated_key = key;
bmap->b_last_allocated_ptr = ptr;
}
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
const struct buffer_head *);
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
/* Assume that bmap semaphore is locked. */
static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
{
return !!(bmap->b_state & NILFS_BMAP_DIRTY);
}
/* Assume that bmap semaphore is locked. */
static inline void nilfs_bmap_set_dirty(struct nilfs_bmap *bmap)
{
bmap->b_state |= NILFS_BMAP_DIRTY;
}
/* Assume that bmap semaphore is locked. */
static inline void nilfs_bmap_clear_dirty(struct nilfs_bmap *bmap)
{
bmap->b_state &= ~NILFS_BMAP_DIRTY;
}
#define NILFS_BMAP_LARGE 0x1
#define NILFS_BMAP_SMALL_LOW NILFS_DIRECT_KEY_MIN
#define NILFS_BMAP_SMALL_HIGH NILFS_DIRECT_KEY_MAX
#define NILFS_BMAP_LARGE_LOW NILFS_BTREE_ROOT_NCHILDREN_MAX
#define NILFS_BMAP_LARGE_HIGH NILFS_BTREE_KEY_MAX
#endif /* _NILFS_BMAP_H */

297
fs/nilfs2/btnode.c Normal file
View file

@ -0,0 +1,297 @@
/*
* btnode.c - NILFS B-tree node cache
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* This file was originally written by Seiji Kihara <kihara@osrg.net>
* and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
* stabilization and simplification.
*
*/
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include <linux/backing-dev.h>
#include <linux/gfp.h>
#include "nilfs.h"
#include "mdt.h"
#include "dat.h"
#include "page.h"
#include "btnode.h"
void nilfs_btnode_cache_clear(struct address_space *btnc)
{
invalidate_mapping_pages(btnc, 0, -1);
truncate_inode_pages(btnc, 0);
}
struct buffer_head *
nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
{
struct inode *inode = NILFS_BTNC_I(btnc);
struct buffer_head *bh;
bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
if (unlikely(!bh))
return NULL;
if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
buffer_dirty(bh))) {
brelse(bh);
BUG();
}
memset(bh->b_data, 0, 1 << inode->i_blkbits);
bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
set_buffer_uptodate(bh);
unlock_page(bh->b_page);
page_cache_release(bh->b_page);
return bh;
}
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
sector_t pblocknr, int mode,
struct buffer_head **pbh, sector_t *submit_ptr)
{
struct buffer_head *bh;
struct inode *inode = NILFS_BTNC_I(btnc);
struct page *page;
int err;
bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
if (unlikely(!bh))
return -ENOMEM;
err = -EEXIST; /* internal code */
page = bh->b_page;
if (buffer_uptodate(bh) || buffer_dirty(bh))
goto found;
if (pblocknr == 0) {
pblocknr = blocknr;
if (inode->i_ino != NILFS_DAT_INO) {
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
/* blocknr is a virtual block number */
err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
&pblocknr);
if (unlikely(err)) {
brelse(bh);
goto out_locked;
}
}
}
if (mode == READA) {
if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
err = -EBUSY; /* internal code */
brelse(bh);
goto out_locked;
}
} else { /* mode == READ */
lock_buffer(bh);
}
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
err = -EEXIST; /* internal code */
goto found;
}
set_buffer_mapped(bh);
bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
submit_bh(mode, bh);
bh->b_blocknr = blocknr; /* set back to the given block address */
*submit_ptr = pblocknr;
err = 0;
found:
*pbh = bh;
out_locked:
unlock_page(page);
page_cache_release(page);
return err;
}
/**
* nilfs_btnode_delete - delete B-tree node buffer
* @bh: buffer to be deleted
*
* nilfs_btnode_delete() invalidates the specified buffer and delete the page
* including the buffer if the page gets unbusy.
*/
void nilfs_btnode_delete(struct buffer_head *bh)
{
struct address_space *mapping;
struct page *page = bh->b_page;
pgoff_t index = page_index(page);
int still_dirty;
page_cache_get(page);
lock_page(page);
wait_on_page_writeback(page);
nilfs_forget_buffer(bh);
still_dirty = PageDirty(page);
mapping = page->mapping;
unlock_page(page);
page_cache_release(page);
if (!still_dirty && mapping)
invalidate_inode_pages2_range(mapping, index, index);
}
/**
* nilfs_btnode_prepare_change_key
* prepare to move contents of the block for old key to one of new key.
* the old buffer will not be removed, but might be reused for new buffer.
* it might return -ENOMEM because of memory allocation errors,
* and might return -EIO because of disk read errors.
*/
int nilfs_btnode_prepare_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *obh, *nbh;
struct inode *inode = NILFS_BTNC_I(btnc);
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
int err;
if (oldkey == newkey)
return 0;
obh = ctxt->bh;
ctxt->newbh = NULL;
if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
lock_page(obh->b_page);
/*
* We cannot call radix_tree_preload for the kernels older
* than 2.6.23, because it is not exported for modules.
*/
retry:
err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (err)
goto failed_unlock;
/* BUG_ON(oldkey != obh->b_page->index); */
if (unlikely(oldkey != obh->b_page->index))
NILFS_PAGE_BUG(obh->b_page,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
spin_lock_irq(&btnc->tree_lock);
err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
spin_unlock_irq(&btnc->tree_lock);
/*
* Note: page->index will not change to newkey until
* nilfs_btnode_commit_change_key() will be called.
* To protect the page in intermediate state, the page lock
* is held.
*/
radix_tree_preload_end();
if (!err)
return 0;
else if (err != -EEXIST)
goto failed_unlock;
err = invalidate_inode_pages2_range(btnc, newkey, newkey);
if (!err)
goto retry;
/* fallback to copy mode */
unlock_page(obh->b_page);
}
nbh = nilfs_btnode_create_block(btnc, newkey);
if (!nbh)
return -ENOMEM;
BUG_ON(nbh == obh);
ctxt->newbh = nbh;
return 0;
failed_unlock:
unlock_page(obh->b_page);
return err;
}
/**
* nilfs_btnode_commit_change_key
* commit the change_key operation prepared by prepare_change_key().
*/
void nilfs_btnode_commit_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
struct page *opage;
if (oldkey == newkey)
return;
if (nbh == NULL) { /* blocksize == pagesize */
opage = obh->b_page;
if (unlikely(oldkey != opage->index))
NILFS_PAGE_BUG(opage,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
mark_buffer_dirty(obh);
spin_lock_irq(&btnc->tree_lock);
radix_tree_delete(&btnc->page_tree, oldkey);
radix_tree_tag_set(&btnc->page_tree, newkey,
PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&btnc->tree_lock);
opage->index = obh->b_blocknr = newkey;
unlock_page(opage);
} else {
nilfs_copy_buffer(nbh, obh);
mark_buffer_dirty(nbh);
nbh->b_blocknr = newkey;
ctxt->bh = nbh;
nilfs_btnode_delete(obh); /* will decrement bh->b_count */
}
}
/**
* nilfs_btnode_abort_change_key
* abort the change_key operation prepared by prepare_change_key().
*/
void nilfs_btnode_abort_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *nbh = ctxt->newbh;
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
if (oldkey == newkey)
return;
if (nbh == NULL) { /* blocksize == pagesize */
spin_lock_irq(&btnc->tree_lock);
radix_tree_delete(&btnc->page_tree, newkey);
spin_unlock_irq(&btnc->tree_lock);
unlock_page(ctxt->bh->b_page);
} else
brelse(nbh);
}

59
fs/nilfs2/btnode.h Normal file
View file

@ -0,0 +1,59 @@
/*
* btnode.h - NILFS B-tree node cache
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Seiji Kihara <kihara@osrg.net>
* Revised by Ryusuke Konishi <ryusuke@osrg.net>
*/
#ifndef _NILFS_BTNODE_H
#define _NILFS_BTNODE_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/backing-dev.h>
/**
* struct nilfs_btnode_chkey_ctxt - change key context
* @oldkey: old key of block's moving content
* @newkey: new key for block's content
* @bh: buffer head of old buffer
* @newbh: buffer head of new buffer
*/
struct nilfs_btnode_chkey_ctxt {
__u64 oldkey;
__u64 newkey;
struct buffer_head *bh;
struct buffer_head *newbh;
};
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
__u64 blocknr);
int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int,
struct buffer_head **, sector_t *);
void nilfs_btnode_delete(struct buffer_head *);
int nilfs_btnode_prepare_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
void nilfs_btnode_commit_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
void nilfs_btnode_abort_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
#endif /* _NILFS_BTNODE_H */

2351
fs/nilfs2/btree.c Normal file

File diff suppressed because it is too large Load diff

77
fs/nilfs2/btree.h Normal file
View file

@ -0,0 +1,77 @@
/*
* btree.h - NILFS B-tree.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#ifndef _NILFS_BTREE_H
#define _NILFS_BTREE_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/list.h>
#include <linux/nilfs2_fs.h>
#include "btnode.h"
#include "bmap.h"
/**
* struct nilfs_btree_path - A path on which B-tree operations are executed
* @bp_bh: buffer head of node block
* @bp_sib_bh: buffer head of sibling node block
* @bp_index: index of child node
* @bp_oldreq: ptr end request for old ptr
* @bp_newreq: ptr alloc request for new ptr
* @bp_op: rebalance operation
*/
struct nilfs_btree_path {
struct buffer_head *bp_bh;
struct buffer_head *bp_sib_bh;
int bp_index;
union nilfs_bmap_ptr_req bp_oldreq;
union nilfs_bmap_ptr_req bp_newreq;
struct nilfs_btnode_chkey_ctxt bp_ctxt;
void (*bp_op)(struct nilfs_bmap *, struct nilfs_btree_path *,
int, __u64 *, __u64 *);
};
#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE
#define NILFS_BTREE_ROOT_NCHILDREN_MAX \
((NILFS_BTREE_ROOT_SIZE - sizeof(struct nilfs_btree_node)) / \
(sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */)))
#define NILFS_BTREE_ROOT_NCHILDREN_MIN 0
#define NILFS_BTREE_NODE_EXTRA_PAD_SIZE (sizeof(__le64))
#define NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) \
(((nodesize) - sizeof(struct nilfs_btree_node) - \
NILFS_BTREE_NODE_EXTRA_PAD_SIZE) / \
(sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */)))
#define NILFS_BTREE_NODE_NCHILDREN_MIN(nodesize) \
((NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) - 1) / 2 + 1)
#define NILFS_BTREE_KEY_MIN ((__u64)0)
#define NILFS_BTREE_KEY_MAX (~(__u64)0)
extern struct kmem_cache *nilfs_btree_path_cache;
int nilfs_btree_init(struct nilfs_bmap *);
int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
const __u64 *, const __u64 *, int);
void nilfs_btree_init_gc(struct nilfs_bmap *);
int nilfs_btree_broken_node_block(struct buffer_head *bh);
#endif /* _NILFS_BTREE_H */

981
fs/nilfs2/cpfile.c Normal file
View file

@ -0,0 +1,981 @@
/*
* cpfile.c - NILFS checkpoint file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/errno.h>
#include <linux/nilfs2_fs.h>
#include "mdt.h"
#include "cpfile.h"
static inline unsigned long
nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile)
{
return NILFS_MDT(cpfile)->mi_entries_per_block;
}
/* block number from the beginning of the file */
static unsigned long
nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno)
{
__u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
return (unsigned long)tcno;
}
/* offset in block */
static unsigned long
nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno)
{
__u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
}
static unsigned long
nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile,
__u64 curr,
__u64 max)
{
return min_t(__u64,
nilfs_cpfile_checkpoints_per_block(cpfile) -
nilfs_cpfile_get_offset(cpfile, curr),
max - curr);
}
static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile,
__u64 cno)
{
return nilfs_cpfile_get_blkoff(cpfile, cno) == 0;
}
static unsigned int
nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile,
struct buffer_head *bh,
void *kaddr,
unsigned int n)
{
struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
unsigned int count;
count = le32_to_cpu(cp->cp_checkpoints_count) + n;
cp->cp_checkpoints_count = cpu_to_le32(count);
return count;
}
static unsigned int
nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile,
struct buffer_head *bh,
void *kaddr,
unsigned int n)
{
struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
unsigned int count;
WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n);
count = le32_to_cpu(cp->cp_checkpoints_count) - n;
cp->cp_checkpoints_count = cpu_to_le32(count);
return count;
}
static inline struct nilfs_cpfile_header *
nilfs_cpfile_block_get_header(const struct inode *cpfile,
struct buffer_head *bh,
void *kaddr)
{
return kaddr + bh_offset(bh);
}
static struct nilfs_checkpoint *
nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno,
struct buffer_head *bh,
void *kaddr)
{
return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) *
NILFS_MDT(cpfile)->mi_entry_size;
}
static void nilfs_cpfile_block_init(struct inode *cpfile,
struct buffer_head *bh,
void *kaddr)
{
struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
int n = nilfs_cpfile_checkpoints_per_block(cpfile);
while (n-- > 0) {
nilfs_checkpoint_set_invalid(cp);
cp = (void *)cp + cpsz;
}
}
static inline int nilfs_cpfile_get_header_block(struct inode *cpfile,
struct buffer_head **bhp)
{
return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp);
}
static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
__u64 cno,
int create,
struct buffer_head **bhp)
{
return nilfs_mdt_get_block(cpfile,
nilfs_cpfile_get_blkoff(cpfile, cno),
create, nilfs_cpfile_block_init, bhp);
}
static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
__u64 cno)
{
return nilfs_mdt_delete_block(cpfile,
nilfs_cpfile_get_blkoff(cpfile, cno));
}
/**
* nilfs_cpfile_get_checkpoint - get a checkpoint
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
* @create: create flag
* @cpp: pointer to a checkpoint
* @bhp: pointer to a buffer head
*
* Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint
* specified by @cno. A new checkpoint will be created if @cno is the current
* checkpoint number and @create is nonzero.
*
* Return Value: On success, 0 is returned, and the checkpoint and the
* buffer head of the buffer on which the checkpoint is located are stored in
* the place pointed by @cpp and @bhp, respectively. On error, one of the
* following negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - No such checkpoint.
*
* %-EINVAL - invalid checkpoint.
*/
int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
__u64 cno,
int create,
struct nilfs_checkpoint **cpp,
struct buffer_head **bhp)
{
struct buffer_head *header_bh, *cp_bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
void *kaddr;
int ret;
if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) ||
(cno < nilfs_mdt_cno(cpfile) && create)))
return -EINVAL;
down_write(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
if (ret < 0)
goto out_sem;
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh);
if (ret < 0)
goto out_header;
kaddr = kmap(cp_bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
if (nilfs_checkpoint_invalid(cp)) {
if (!create) {
kunmap(cp_bh->b_page);
brelse(cp_bh);
ret = -ENOENT;
goto out_header;
}
/* a newly-created checkpoint */
nilfs_checkpoint_clear_invalid(cp);
if (!nilfs_cpfile_is_in_first(cpfile, cno))
nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
kaddr, 1);
mark_buffer_dirty(cp_bh);
kaddr = kmap_atomic(header_bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, header_bh,
kaddr);
le64_add_cpu(&header->ch_ncheckpoints, 1);
kunmap_atomic(kaddr);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
}
if (cpp != NULL)
*cpp = cp;
*bhp = cp_bh;
out_header:
brelse(header_bh);
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
* nilfs_cpfile_put_checkpoint - put a checkpoint
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
* @bh: buffer head
*
* Description: nilfs_cpfile_put_checkpoint() releases the checkpoint
* specified by @cno. @bh must be the buffer head which has been returned by
* a previous call to nilfs_cpfile_get_checkpoint() with @cno.
*/
void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno,
struct buffer_head *bh)
{
kunmap(bh->b_page);
brelse(bh);
}
/**
* nilfs_cpfile_delete_checkpoints - delete checkpoints
* @cpfile: inode of checkpoint file
* @start: start checkpoint number
* @end: end checkpoint numer
*
* Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in
* the period from @start to @end, excluding @end itself. The checkpoints
* which have been already deleted are ignored.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-EINVAL - invalid checkpoints.
*/
int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
__u64 start,
__u64 end)
{
struct buffer_head *header_bh, *cp_bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cno;
void *kaddr;
unsigned long tnicps;
int ret, ncps, nicps, nss, count, i;
if (unlikely(start == 0 || start > end)) {
printk(KERN_ERR "%s: invalid range of checkpoint numbers: "
"[%llu, %llu)\n", __func__,
(unsigned long long)start, (unsigned long long)end);
return -EINVAL;
}
down_write(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
if (ret < 0)
goto out_sem;
tnicps = 0;
nss = 0;
for (cno = start; cno < end; cno += ncps) {
ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0) {
if (ret != -ENOENT)
break;
/* skip hole */
ret = 0;
continue;
}
kaddr = kmap_atomic(cp_bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(
cpfile, cno, cp_bh, kaddr);
nicps = 0;
for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) {
if (nilfs_checkpoint_snapshot(cp)) {
nss++;
} else if (!nilfs_checkpoint_invalid(cp)) {
nilfs_checkpoint_set_invalid(cp);
nicps++;
}
}
if (nicps > 0) {
tnicps += nicps;
mark_buffer_dirty(cp_bh);
nilfs_mdt_mark_dirty(cpfile);
if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
count =
nilfs_cpfile_block_sub_valid_checkpoints(
cpfile, cp_bh, kaddr, nicps);
if (count == 0) {
/* make hole */
kunmap_atomic(kaddr);
brelse(cp_bh);
ret =
nilfs_cpfile_delete_checkpoint_block(
cpfile, cno);
if (ret == 0)
continue;
printk(KERN_ERR
"%s: cannot delete block\n",
__func__);
break;
}
}
}
kunmap_atomic(kaddr);
brelse(cp_bh);
}
if (tnicps > 0) {
kaddr = kmap_atomic(header_bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, header_bh,
kaddr);
le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
kunmap_atomic(kaddr);
}
brelse(header_bh);
if (nss > 0)
ret = -EBUSY;
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile,
struct nilfs_checkpoint *cp,
struct nilfs_cpinfo *ci)
{
ci->ci_flags = le32_to_cpu(cp->cp_flags);
ci->ci_cno = le64_to_cpu(cp->cp_cno);
ci->ci_create = le64_to_cpu(cp->cp_create);
ci->ci_nblk_inc = le64_to_cpu(cp->cp_nblk_inc);
ci->ci_inodes_count = le64_to_cpu(cp->cp_inodes_count);
ci->ci_blocks_count = le64_to_cpu(cp->cp_blocks_count);
ci->ci_next = le64_to_cpu(cp->cp_snapshot_list.ssl_next);
}
static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
void *buf, unsigned cisz, size_t nci)
{
struct nilfs_checkpoint *cp;
struct nilfs_cpinfo *ci = buf;
struct buffer_head *bh;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop;
void *kaddr;
int n, ret;
int ncps, i;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_read(&NILFS_MDT(cpfile)->mi_sem);
for (n = 0; cno < cur_cno && n < nci; cno += ncps) {
ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
if (ret < 0) {
if (ret != -ENOENT)
goto out;
continue; /* skip hole */
}
kaddr = kmap_atomic(bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
if (!nilfs_checkpoint_invalid(cp)) {
nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp,
ci);
ci = (void *)ci + cisz;
n++;
}
}
kunmap_atomic(kaddr);
brelse(bh);
}
ret = n;
if (n > 0) {
ci = (void *)ci - cisz;
*cnop = ci->ci_cno + 1;
}
out:
up_read(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
void *buf, unsigned cisz, size_t nci)
{
struct buffer_head *bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
struct nilfs_cpinfo *ci = buf;
__u64 curr = *cnop, next;
unsigned long curr_blkoff, next_blkoff;
void *kaddr;
int n = 0, ret;
down_read(&NILFS_MDT(cpfile)->mi_sem);
if (curr == 0) {
ret = nilfs_cpfile_get_header_block(cpfile, &bh);
if (ret < 0)
goto out;
kaddr = kmap_atomic(bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
kunmap_atomic(kaddr);
brelse(bh);
if (curr == 0) {
ret = 0;
goto out;
}
} else if (unlikely(curr == ~(__u64)0)) {
ret = 0;
goto out;
}
curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh);
if (unlikely(ret < 0)) {
if (ret == -ENOENT)
ret = 0; /* No snapshots (started from a hole block) */
goto out;
}
kaddr = kmap_atomic(bh->b_page);
while (n < nci) {
cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
curr = ~(__u64)0; /* Terminator */
if (unlikely(nilfs_checkpoint_invalid(cp) ||
!nilfs_checkpoint_snapshot(cp)))
break;
nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, ci);
ci = (void *)ci + cisz;
n++;
next = le64_to_cpu(cp->cp_snapshot_list.ssl_next);
if (next == 0)
break; /* reach end of the snapshot list */
next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
if (curr_blkoff != next_blkoff) {
kunmap_atomic(kaddr);
brelse(bh);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
0, &bh);
if (unlikely(ret < 0)) {
WARN_ON(ret == -ENOENT);
goto out;
}
kaddr = kmap_atomic(bh->b_page);
}
curr = next;
curr_blkoff = next_blkoff;
}
kunmap_atomic(kaddr);
brelse(bh);
*cnop = curr;
ret = n;
out:
up_read(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
* nilfs_cpfile_get_cpinfo -
* @cpfile:
* @cno:
* @ci:
* @nci:
*/
ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
void *buf, unsigned cisz, size_t nci)
{
switch (mode) {
case NILFS_CHECKPOINT:
return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, buf, cisz, nci);
case NILFS_SNAPSHOT:
return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, buf, cisz, nci);
default:
return -EINVAL;
}
}
/**
* nilfs_cpfile_delete_checkpoint -
* @cpfile:
* @cno:
*/
int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
{
struct nilfs_cpinfo ci;
__u64 tcno = cno;
ssize_t nci;
nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, sizeof(ci), 1);
if (nci < 0)
return nci;
else if (nci == 0 || ci.ci_cno != cno)
return -ENOENT;
else if (nilfs_cpinfo_snapshot(&ci))
return -EBUSY;
return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1);
}
static struct nilfs_snapshot_list *
nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile,
__u64 cno,
struct buffer_head *bh,
void *kaddr)
{
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
struct nilfs_snapshot_list *list;
if (cno != 0) {
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
list = &cp->cp_snapshot_list;
} else {
header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
list = &header->ch_snapshot_list;
}
return list;
}
static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
struct nilfs_snapshot_list *list;
__u64 curr, prev;
unsigned long curr_blkoff, prev_blkoff;
void *kaddr;
int ret;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_write(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0)
goto out_sem;
kaddr = kmap_atomic(cp_bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
if (nilfs_checkpoint_invalid(cp)) {
ret = -ENOENT;
kunmap_atomic(kaddr);
goto out_cp;
}
if (nilfs_checkpoint_snapshot(cp)) {
ret = 0;
kunmap_atomic(kaddr);
goto out_cp;
}
kunmap_atomic(kaddr);
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
if (ret < 0)
goto out_cp;
kaddr = kmap_atomic(header_bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
list = &header->ch_snapshot_list;
curr_bh = header_bh;
get_bh(curr_bh);
curr = 0;
curr_blkoff = 0;
prev = le64_to_cpu(list->ssl_prev);
while (prev > cno) {
prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
curr = prev;
if (curr_blkoff != prev_blkoff) {
kunmap_atomic(kaddr);
brelse(curr_bh);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
0, &curr_bh);
if (ret < 0)
goto out_header;
kaddr = kmap_atomic(curr_bh->b_page);
}
curr_blkoff = prev_blkoff;
cp = nilfs_cpfile_block_get_checkpoint(
cpfile, curr, curr_bh, kaddr);
list = &cp->cp_snapshot_list;
prev = le64_to_cpu(list->ssl_prev);
}
kunmap_atomic(kaddr);
if (prev != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
&prev_bh);
if (ret < 0)
goto out_curr;
} else {
prev_bh = header_bh;
get_bh(prev_bh);
}
kaddr = kmap_atomic(curr_bh->b_page);
list = nilfs_cpfile_block_get_snapshot_list(
cpfile, curr, curr_bh, kaddr);
list->ssl_prev = cpu_to_le64(cno);
kunmap_atomic(kaddr);
kaddr = kmap_atomic(cp_bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
nilfs_checkpoint_set_snapshot(cp);
kunmap_atomic(kaddr);
kaddr = kmap_atomic(prev_bh->b_page);
list = nilfs_cpfile_block_get_snapshot_list(
cpfile, prev, prev_bh, kaddr);
list->ssl_next = cpu_to_le64(cno);
kunmap_atomic(kaddr);
kaddr = kmap_atomic(header_bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
le64_add_cpu(&header->ch_nsnapshots, 1);
kunmap_atomic(kaddr);
mark_buffer_dirty(prev_bh);
mark_buffer_dirty(curr_bh);
mark_buffer_dirty(cp_bh);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
brelse(prev_bh);
out_curr:
brelse(curr_bh);
out_header:
brelse(header_bh);
out_cp:
brelse(cp_bh);
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *header_bh, *next_bh, *prev_bh, *cp_bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
struct nilfs_snapshot_list *list;
__u64 next, prev;
void *kaddr;
int ret;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_write(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0)
goto out_sem;
kaddr = kmap_atomic(cp_bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
if (nilfs_checkpoint_invalid(cp)) {
ret = -ENOENT;
kunmap_atomic(kaddr);
goto out_cp;
}
if (!nilfs_checkpoint_snapshot(cp)) {
ret = 0;
kunmap_atomic(kaddr);
goto out_cp;
}
list = &cp->cp_snapshot_list;
next = le64_to_cpu(list->ssl_next);
prev = le64_to_cpu(list->ssl_prev);
kunmap_atomic(kaddr);
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
if (ret < 0)
goto out_cp;
if (next != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0,
&next_bh);
if (ret < 0)
goto out_header;
} else {
next_bh = header_bh;
get_bh(next_bh);
}
if (prev != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
&prev_bh);
if (ret < 0)
goto out_next;
} else {
prev_bh = header_bh;
get_bh(prev_bh);
}
kaddr = kmap_atomic(next_bh->b_page);
list = nilfs_cpfile_block_get_snapshot_list(
cpfile, next, next_bh, kaddr);
list->ssl_prev = cpu_to_le64(prev);
kunmap_atomic(kaddr);
kaddr = kmap_atomic(prev_bh->b_page);
list = nilfs_cpfile_block_get_snapshot_list(
cpfile, prev, prev_bh, kaddr);
list->ssl_next = cpu_to_le64(next);
kunmap_atomic(kaddr);
kaddr = kmap_atomic(cp_bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
nilfs_checkpoint_clear_snapshot(cp);
kunmap_atomic(kaddr);
kaddr = kmap_atomic(header_bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
le64_add_cpu(&header->ch_nsnapshots, -1);
kunmap_atomic(kaddr);
mark_buffer_dirty(next_bh);
mark_buffer_dirty(prev_bh);
mark_buffer_dirty(cp_bh);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
brelse(prev_bh);
out_next:
brelse(next_bh);
out_header:
brelse(header_bh);
out_cp:
brelse(cp_bh);
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
* nilfs_cpfile_is_snapshot -
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
*
* Description:
*
* Return Value: On success, 1 is returned if the checkpoint specified by
* @cno is a snapshot, or 0 if not. On error, one of the following negative
* error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - No such checkpoint.
*/
int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *bh;
struct nilfs_checkpoint *cp;
void *kaddr;
int ret;
/* CP number is invalid if it's zero or larger than the
largest exist one.*/
if (cno == 0 || cno >= nilfs_mdt_cno(cpfile))
return -ENOENT;
down_read(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
if (ret < 0)
goto out;
kaddr = kmap_atomic(bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
if (nilfs_checkpoint_invalid(cp))
ret = -ENOENT;
else
ret = nilfs_checkpoint_snapshot(cp);
kunmap_atomic(kaddr);
brelse(bh);
out:
up_read(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
* nilfs_cpfile_change_cpmode - change checkpoint mode
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
* @status: mode of checkpoint
*
* Description: nilfs_change_cpmode() changes the mode of the checkpoint
* specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - No such checkpoint.
*/
int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
{
int ret;
switch (mode) {
case NILFS_CHECKPOINT:
if (nilfs_checkpoint_is_mounted(cpfile->i_sb, cno))
/*
* Current implementation does not have to protect
* plain read-only mounts since they are exclusive
* with a read/write mount and are protected from the
* cleaner.
*/
ret = -EBUSY;
else
ret = nilfs_cpfile_clear_snapshot(cpfile, cno);
return ret;
case NILFS_SNAPSHOT:
return nilfs_cpfile_set_snapshot(cpfile, cno);
default:
return -EINVAL;
}
}
/**
* nilfs_cpfile_get_stat - get checkpoint statistics
* @cpfile: inode of checkpoint file
* @stat: pointer to a structure of checkpoint statistics
*
* Description: nilfs_cpfile_get_stat() returns information about checkpoints.
*
* Return Value: On success, 0 is returned, and checkpoints information is
* stored in the place pointed by @stat. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
{
struct buffer_head *bh;
struct nilfs_cpfile_header *header;
void *kaddr;
int ret;
down_read(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_header_block(cpfile, &bh);
if (ret < 0)
goto out_sem;
kaddr = kmap_atomic(bh->b_page);
header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
cpstat->cs_cno = nilfs_mdt_cno(cpfile);
cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
kunmap_atomic(kaddr);
brelse(bh);
out_sem:
up_read(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
* nilfs_cpfile_read - read or get cpfile inode
* @sb: super block instance
* @cpsize: size of a checkpoint entry
* @raw_inode: on-disk cpfile inode
* @inodep: buffer to store the inode
*/
int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct nilfs_inode *raw_inode, struct inode **inodep)
{
struct inode *cpfile;
int err;
if (cpsize > sb->s_blocksize) {
printk(KERN_ERR
"NILFS: too large checkpoint size: %zu bytes.\n",
cpsize);
return -EINVAL;
} else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) {
printk(KERN_ERR
"NILFS: too small checkpoint size: %zu bytes.\n",
cpsize);
return -EINVAL;
}
cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO);
if (unlikely(!cpfile))
return -ENOMEM;
if (!(cpfile->i_state & I_NEW))
goto out;
err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0);
if (err)
goto failed;
nilfs_mdt_set_entry_size(cpfile, cpsize,
sizeof(struct nilfs_cpfile_header));
err = nilfs_read_inode_common(cpfile, raw_inode);
if (err)
goto failed;
unlock_new_inode(cpfile);
out:
*inodep = cpfile;
return 0;
failed:
iget_failed(cpfile);
return err;
}

46
fs/nilfs2/cpfile.h Normal file
View file

@ -0,0 +1,46 @@
/*
* cpfile.h - NILFS checkpoint file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#ifndef _NILFS_CPFILE_H
#define _NILFS_CPFILE_H
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/nilfs2_fs.h>
int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
struct nilfs_checkpoint **,
struct buffer_head **);
void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *);
int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64);
int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
int nilfs_cpfile_is_snapshot(struct inode *, __u64);
int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
size_t);
int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct nilfs_inode *raw_inode, struct inode **inodep);
#endif /* _NILFS_CPFILE_H */

529
fs/nilfs2/dat.c Normal file
View file

@ -0,0 +1,529 @@
/*
* dat.c - NILFS disk address translation.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/string.h>
#include <linux/errno.h>
#include "nilfs.h"
#include "mdt.h"
#include "alloc.h"
#include "dat.h"
#define NILFS_CNO_MIN ((__u64)1)
#define NILFS_CNO_MAX (~(__u64)0)
/**
* struct nilfs_dat_info - on-memory private data of DAT file
* @mi: on-memory private data of metadata file
* @palloc_cache: persistent object allocator cache of DAT file
* @shadow: shadow map of DAT file
*/
struct nilfs_dat_info {
struct nilfs_mdt_info mi;
struct nilfs_palloc_cache palloc_cache;
struct nilfs_shadow_map shadow;
};
static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
{
return (struct nilfs_dat_info *)NILFS_MDT(dat);
}
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
create, &req->pr_entry_bh);
}
static void nilfs_dat_commit_entry(struct inode *dat,
struct nilfs_palloc_req *req)
{
mark_buffer_dirty(req->pr_entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(req->pr_entry_bh);
}
static void nilfs_dat_abort_entry(struct inode *dat,
struct nilfs_palloc_req *req)
{
brelse(req->pr_entry_bh);
}
int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
ret = nilfs_palloc_prepare_alloc_entry(dat, req);
if (ret < 0)
return ret;
ret = nilfs_dat_prepare_entry(dat, req, 1);
if (ret < 0)
nilfs_palloc_abort_alloc_entry(dat, req);
return ret;
}
void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
entry->de_blocknr = cpu_to_le64(0);
kunmap_atomic(kaddr);
nilfs_palloc_commit_alloc_entry(dat, req);
nilfs_dat_commit_entry(dat, req);
}
void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
nilfs_dat_abort_entry(dat, req);
nilfs_palloc_abort_alloc_entry(dat, req);
}
static void nilfs_dat_commit_free(struct inode *dat,
struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
entry->de_blocknr = cpu_to_le64(0);
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
nilfs_palloc_commit_free_entry(dat, req);
}
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
WARN_ON(ret == -ENOENT);
return ret;
}
void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
sector_t blocknr)
{
struct nilfs_dat_entry *entry;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
entry->de_blocknr = cpu_to_le64(blocknr);
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
}
int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
__u64 start;
sector_t blocknr;
void *kaddr;
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
if (ret < 0) {
WARN_ON(ret == -ENOENT);
return ret;
}
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
kunmap_atomic(kaddr);
if (blocknr == 0) {
ret = nilfs_palloc_prepare_free_entry(dat, req);
if (ret < 0) {
nilfs_dat_abort_entry(dat, req);
return ret;
}
}
return 0;
}
void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
int dead)
{
struct nilfs_dat_entry *entry;
__u64 start, end;
sector_t blocknr;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
end = start = le64_to_cpu(entry->de_start);
if (!dead) {
end = nilfs_mdt_cno(dat);
WARN_ON(start > end);
}
entry->de_end = cpu_to_le64(end);
blocknr = le64_to_cpu(entry->de_blocknr);
kunmap_atomic(kaddr);
if (blocknr == 0)
nilfs_dat_commit_free(dat, req);
else
nilfs_dat_commit_entry(dat, req);
}
void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
__u64 start;
sector_t blocknr;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
kunmap_atomic(kaddr);
if (start == nilfs_mdt_cno(dat) && blocknr == 0)
nilfs_palloc_abort_free_entry(dat, req);
nilfs_dat_abort_entry(dat, req);
}
int nilfs_dat_prepare_update(struct inode *dat,
struct nilfs_palloc_req *oldreq,
struct nilfs_palloc_req *newreq)
{
int ret;
ret = nilfs_dat_prepare_end(dat, oldreq);
if (!ret) {
ret = nilfs_dat_prepare_alloc(dat, newreq);
if (ret < 0)
nilfs_dat_abort_end(dat, oldreq);
}
return ret;
}
void nilfs_dat_commit_update(struct inode *dat,
struct nilfs_palloc_req *oldreq,
struct nilfs_palloc_req *newreq, int dead)
{
nilfs_dat_commit_end(dat, oldreq, dead);
nilfs_dat_commit_alloc(dat, newreq);
}
void nilfs_dat_abort_update(struct inode *dat,
struct nilfs_palloc_req *oldreq,
struct nilfs_palloc_req *newreq)
{
nilfs_dat_abort_end(dat, oldreq);
nilfs_dat_abort_alloc(dat, newreq);
}
/**
* nilfs_dat_mark_dirty -
* @dat: DAT file inode
* @vblocknr: virtual block number
*
* Description:
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
{
struct nilfs_palloc_req req;
int ret;
req.pr_entry_nr = vblocknr;
ret = nilfs_dat_prepare_entry(dat, &req, 0);
if (ret == 0)
nilfs_dat_commit_entry(dat, &req);
return ret;
}
/**
* nilfs_dat_freev - free virtual block numbers
* @dat: DAT file inode
* @vblocknrs: array of virtual block numbers
* @nitems: number of virtual block numbers
*
* Description: nilfs_dat_freev() frees the virtual block numbers specified by
* @vblocknrs and @nitems.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - The virtual block number have not been allocated.
*/
int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
{
return nilfs_palloc_freev(dat, vblocknrs, nitems);
}
/**
* nilfs_dat_move - change a block number
* @dat: DAT file inode
* @vblocknr: virtual block number
* @blocknr: block number
*
* Description: nilfs_dat_move() changes the block number associated with
* @vblocknr to @blocknr.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
void *kaddr;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
if (ret < 0)
return ret;
/*
* The given disk block number (blocknr) is not yet written to
* the device at this point.
*
* To prevent nilfs_dat_translate() from returning the
* uncommitted block number, this makes a copy of the entry
* buffer and redirects nilfs_dat_translate() to the copy.
*/
if (!buffer_nilfs_redirected(entry_bh)) {
ret = nilfs_mdt_freeze_buffer(dat, entry_bh);
if (ret) {
brelse(entry_bh);
return ret;
}
}
kaddr = kmap_atomic(entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
(unsigned long long)vblocknr,
(unsigned long long)le64_to_cpu(entry->de_start),
(unsigned long long)le64_to_cpu(entry->de_end));
kunmap_atomic(kaddr);
brelse(entry_bh);
return -EINVAL;
}
WARN_ON(blocknr == 0);
entry->de_blocknr = cpu_to_le64(blocknr);
kunmap_atomic(kaddr);
mark_buffer_dirty(entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(entry_bh);
return 0;
}
/**
* nilfs_dat_translate - translate a virtual block number to a block number
* @dat: DAT file inode
* @vblocknr: virtual block number
* @blocknrp: pointer to a block number
*
* Description: nilfs_dat_translate() maps the virtual block number @vblocknr
* to the corresponding block number.
*
* Return Value: On success, 0 is returned and the block number associated
* with @vblocknr is stored in the place pointed by @blocknrp. On error, one
* of the following negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - A block number associated with @vblocknr does not exist.
*/
int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
{
struct buffer_head *entry_bh, *bh;
struct nilfs_dat_entry *entry;
sector_t blocknr;
void *kaddr;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
if (ret < 0)
return ret;
if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) {
bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh);
if (bh) {
WARN_ON(!buffer_uptodate(bh));
brelse(entry_bh);
entry_bh = bh;
}
}
kaddr = kmap_atomic(entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
blocknr = le64_to_cpu(entry->de_blocknr);
if (blocknr == 0) {
ret = -ENOENT;
goto out;
}
*blocknrp = blocknr;
out:
kunmap_atomic(kaddr);
brelse(entry_bh);
return ret;
}
ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
size_t nvi)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
struct nilfs_vinfo *vinfo = buf;
__u64 first, last;
void *kaddr;
unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
int i, j, n, ret;
for (i = 0; i < nvi; i += n) {
ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr,
0, &entry_bh);
if (ret < 0)
return ret;
kaddr = kmap_atomic(entry_bh->b_page);
/* last virtual block number in this block */
first = vinfo->vi_vblocknr;
do_div(first, entries_per_block);
first *= entries_per_block;
last = first + entries_per_block - 1;
for (j = i, n = 0;
j < nvi && vinfo->vi_vblocknr >= first &&
vinfo->vi_vblocknr <= last;
j++, n++, vinfo = (void *)vinfo + visz) {
entry = nilfs_palloc_block_get_entry(
dat, vinfo->vi_vblocknr, entry_bh, kaddr);
vinfo->vi_start = le64_to_cpu(entry->de_start);
vinfo->vi_end = le64_to_cpu(entry->de_end);
vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
}
kunmap_atomic(kaddr);
brelse(entry_bh);
}
return nvi;
}
/**
* nilfs_dat_read - read or get dat inode
* @sb: super block instance
* @entry_size: size of a dat entry
* @raw_inode: on-disk dat inode
* @inodep: buffer to store the inode
*/
int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_inode *raw_inode, struct inode **inodep)
{
static struct lock_class_key dat_lock_key;
struct inode *dat;
struct nilfs_dat_info *di;
int err;
if (entry_size > sb->s_blocksize) {
printk(KERN_ERR
"NILFS: too large DAT entry size: %zu bytes.\n",
entry_size);
return -EINVAL;
} else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) {
printk(KERN_ERR
"NILFS: too small DAT entry size: %zu bytes.\n",
entry_size);
return -EINVAL;
}
dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO);
if (unlikely(!dat))
return -ENOMEM;
if (!(dat->i_state & I_NEW))
goto out;
err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di));
if (err)
goto failed;
err = nilfs_palloc_init_blockgroup(dat, entry_size);
if (err)
goto failed;
di = NILFS_DAT_I(dat);
lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
nilfs_palloc_setup_cache(dat, &di->palloc_cache);
nilfs_mdt_setup_shadow_map(dat, &di->shadow);
err = nilfs_read_inode_common(dat, raw_inode);
if (err)
goto failed;
unlock_new_inode(dat);
out:
*inodep = dat;
return 0;
failed:
iget_failed(dat);
return err;
}

59
fs/nilfs2/dat.h Normal file
View file

@ -0,0 +1,59 @@
/*
* dat.h - NILFS disk address translation.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#ifndef _NILFS_DAT_H
#define _NILFS_DAT_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
struct nilfs_palloc_req;
int nilfs_dat_translate(struct inode *, __u64, sector_t *);
int nilfs_dat_prepare_alloc(struct inode *, struct nilfs_palloc_req *);
void nilfs_dat_commit_alloc(struct inode *, struct nilfs_palloc_req *);
void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *);
int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *);
void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *,
sector_t);
int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *);
void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int);
void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *,
struct nilfs_palloc_req *);
void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
struct nilfs_palloc_req *, int);
void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
struct nilfs_palloc_req *);
int nilfs_dat_mark_dirty(struct inode *, __u64);
int nilfs_dat_freev(struct inode *, __u64 *, size_t);
int nilfs_dat_move(struct inode *, __u64, sector_t);
ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_inode *raw_inode, struct inode **inodep);
#endif /* _NILFS_DAT_H */

676
fs/nilfs2/dir.c Normal file
View file

@ -0,0 +1,676 @@
/*
* dir.c - NILFS directory entry operations
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>
*/
/*
* linux/fs/ext2/dir.c
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/fs/minix/dir.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* ext2 directory handling functions
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
*
* All code that works with directory layout had been switched to pagecache
* and moved here. AV
*/
#include <linux/pagemap.h>
#include "nilfs.h"
#include "page.h"
/*
* nilfs uses block-sized chunks. Arguably, sector-sized ones would be
* more robust, but we have what we have
*/
static inline unsigned nilfs_chunk_size(struct inode *inode)
{
return inode->i_sb->s_blocksize;
}
static inline void nilfs_put_page(struct page *page)
{
kunmap(page);
page_cache_release(page);
}
static inline unsigned long dir_pages(struct inode *inode)
{
return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
}
/*
* Return the offset into page `page_nr' of the last valid
* byte in that page, plus one.
*/
static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
{
unsigned last_byte = inode->i_size;
last_byte -= page_nr << PAGE_CACHE_SHIFT;
if (last_byte > PAGE_CACHE_SIZE)
last_byte = PAGE_CACHE_SIZE;
return last_byte;
}
static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
{
loff_t pos = page_offset(page) + from;
return __block_write_begin(page, pos, to - from, nilfs_get_block);
}
static void nilfs_commit_chunk(struct page *page,
struct address_space *mapping,
unsigned from, unsigned to)
{
struct inode *dir = mapping->host;
loff_t pos = page_offset(page) + from;
unsigned len = to - from;
unsigned nr_dirty, copied;
int err;
nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
if (pos + copied > dir->i_size)
i_size_write(dir, pos + copied);
if (IS_DIRSYNC(dir))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
err = nilfs_set_file_dirty(dir, nr_dirty);
WARN_ON(err); /* do not happen */
unlock_page(page);
}
static void nilfs_check_page(struct page *page)
{
struct inode *dir = page->mapping->host;
struct super_block *sb = dir->i_sb;
unsigned chunk_size = nilfs_chunk_size(dir);
char *kaddr = page_address(page);
unsigned offs, rec_len;
unsigned limit = PAGE_CACHE_SIZE;
struct nilfs_dir_entry *p;
char *error;
if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
limit = dir->i_size & ~PAGE_CACHE_MASK;
if (limit & (chunk_size - 1))
goto Ebadsize;
if (!limit)
goto out;
}
for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) {
p = (struct nilfs_dir_entry *)(kaddr + offs);
rec_len = nilfs_rec_len_from_disk(p->rec_len);
if (rec_len < NILFS_DIR_REC_LEN(1))
goto Eshort;
if (rec_len & 3)
goto Ealign;
if (rec_len < NILFS_DIR_REC_LEN(p->name_len))
goto Enamelen;
if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
goto Espan;
}
if (offs != limit)
goto Eend;
out:
SetPageChecked(page);
return;
/* Too bad, we had an error */
Ebadsize:
nilfs_error(sb, "nilfs_check_page",
"size of directory #%lu is not a multiple of chunk size",
dir->i_ino
);
goto fail;
Eshort:
error = "rec_len is smaller than minimal";
goto bad_entry;
Ealign:
error = "unaligned directory entry";
goto bad_entry;
Enamelen:
error = "rec_len is too small for name_len";
goto bad_entry;
Espan:
error = "directory entry across blocks";
bad_entry:
nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - "
"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
(unsigned long) le64_to_cpu(p->inode),
rec_len, p->name_len);
goto fail;
Eend:
p = (struct nilfs_dir_entry *)(kaddr + offs);
nilfs_error(sb, "nilfs_check_page",
"entry in directory #%lu spans the page boundary"
"offset=%lu, inode=%lu",
dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
(unsigned long) le64_to_cpu(p->inode));
fail:
SetPageChecked(page);
SetPageError(page);
}
static struct page *nilfs_get_page(struct inode *dir, unsigned long n)
{
struct address_space *mapping = dir->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
if (!IS_ERR(page)) {
kmap(page);
if (!PageChecked(page))
nilfs_check_page(page);
if (PageError(page))
goto fail;
}
return page;
fail:
nilfs_put_page(page);
return ERR_PTR(-EIO);
}
/*
* NOTE! unlike strncmp, nilfs_match returns 1 for success, 0 for failure.
*
* len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller.
*/
static int
nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de)
{
if (len != de->name_len)
return 0;
if (!de->inode)
return 0;
return !memcmp(name, de->name, len);
}
/*
* p is at least 6 bytes before the end of page
*/
static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
{
return (struct nilfs_dir_entry *)((char *)p +
nilfs_rec_len_from_disk(p->rec_len));
}
static unsigned char
nilfs_filetype_table[NILFS_FT_MAX] = {
[NILFS_FT_UNKNOWN] = DT_UNKNOWN,
[NILFS_FT_REG_FILE] = DT_REG,
[NILFS_FT_DIR] = DT_DIR,
[NILFS_FT_CHRDEV] = DT_CHR,
[NILFS_FT_BLKDEV] = DT_BLK,
[NILFS_FT_FIFO] = DT_FIFO,
[NILFS_FT_SOCK] = DT_SOCK,
[NILFS_FT_SYMLINK] = DT_LNK,
};
#define S_SHIFT 12
static unsigned char
nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
[S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV,
[S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO,
[S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK,
[S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK,
};
static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
{
umode_t mode = inode->i_mode;
de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}
static int nilfs_readdir(struct file *file, struct dir_context *ctx)
{
loff_t pos = ctx->pos;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
unsigned int offset = pos & ~PAGE_CACHE_MASK;
unsigned long n = pos >> PAGE_CACHE_SHIFT;
unsigned long npages = dir_pages(inode);
/* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */
if (pos > inode->i_size - NILFS_DIR_REC_LEN(1))
return 0;
for ( ; n < npages; n++, offset = 0) {
char *kaddr, *limit;
struct nilfs_dir_entry *de;
struct page *page = nilfs_get_page(inode, n);
if (IS_ERR(page)) {
nilfs_error(sb, __func__, "bad page in #%lu",
inode->i_ino);
ctx->pos += PAGE_CACHE_SIZE - offset;
return -EIO;
}
kaddr = page_address(page);
de = (struct nilfs_dir_entry *)(kaddr + offset);
limit = kaddr + nilfs_last_byte(inode, n) -
NILFS_DIR_REC_LEN(1);
for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) {
if (de->rec_len == 0) {
nilfs_error(sb, __func__,
"zero-length directory entry");
nilfs_put_page(page);
return -EIO;
}
if (de->inode) {
unsigned char t;
if (de->file_type < NILFS_FT_MAX)
t = nilfs_filetype_table[de->file_type];
else
t = DT_UNKNOWN;
if (!dir_emit(ctx, de->name, de->name_len,
le64_to_cpu(de->inode), t)) {
nilfs_put_page(page);
return 0;
}
}
ctx->pos += nilfs_rec_len_from_disk(de->rec_len);
}
nilfs_put_page(page);
}
return 0;
}
/*
* nilfs_find_entry()
*
* finds an entry in the specified directory with the wanted name. It
* returns the page in which the entry was found, and the entry itself
* (as a parameter - res_dir). Page is returned mapped and unlocked.
* Entry is guaranteed to be valid.
*/
struct nilfs_dir_entry *
nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
struct page **res_page)
{
const unsigned char *name = qstr->name;
int namelen = qstr->len;
unsigned reclen = NILFS_DIR_REC_LEN(namelen);
unsigned long start, n;
unsigned long npages = dir_pages(dir);
struct page *page = NULL;
struct nilfs_inode_info *ei = NILFS_I(dir);
struct nilfs_dir_entry *de;
if (npages == 0)
goto out;
/* OFFSET_CACHE */
*res_page = NULL;
start = ei->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
do {
char *kaddr;
page = nilfs_get_page(dir, n);
if (!IS_ERR(page)) {
kaddr = page_address(page);
de = (struct nilfs_dir_entry *)kaddr;
kaddr += nilfs_last_byte(dir, n) - reclen;
while ((char *) de <= kaddr) {
if (de->rec_len == 0) {
nilfs_error(dir->i_sb, __func__,
"zero-length directory entry");
nilfs_put_page(page);
goto out;
}
if (nilfs_match(namelen, name, de))
goto found;
de = nilfs_next_entry(de);
}
nilfs_put_page(page);
}
if (++n >= npages)
n = 0;
/* next page is past the blocks we've got */
if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
nilfs_error(dir->i_sb, __func__,
"dir %lu size %lld exceeds block count %llu",
dir->i_ino, dir->i_size,
(unsigned long long)dir->i_blocks);
goto out;
}
} while (n != start);
out:
return NULL;
found:
*res_page = page;
ei->i_dir_start_lookup = n;
return de;
}
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p)
{
struct page *page = nilfs_get_page(dir, 0);
struct nilfs_dir_entry *de = NULL;
if (!IS_ERR(page)) {
de = nilfs_next_entry(
(struct nilfs_dir_entry *)page_address(page));
*p = page;
}
return de;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
{
ino_t res = 0;
struct nilfs_dir_entry *de;
struct page *page;
de = nilfs_find_entry(dir, qstr, &page);
if (de) {
res = le64_to_cpu(de->inode);
kunmap(page);
page_cache_release(page);
}
return res;
}
/* Releases the page */
void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
struct page *page, struct inode *inode)
{
unsigned from = (char *) de - (char *) page_address(page);
unsigned to = from + nilfs_rec_len_from_disk(de->rec_len);
struct address_space *mapping = page->mapping;
int err;
lock_page(page);
err = nilfs_prepare_chunk(page, from, to);
BUG_ON(err);
de->inode = cpu_to_le64(inode->i_ino);
nilfs_set_de_type(de, inode);
nilfs_commit_chunk(page, mapping, from, to);
nilfs_put_page(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
}
/*
* Parent is locked.
*/
int nilfs_add_link(struct dentry *dentry, struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;
const unsigned char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
unsigned chunk_size = nilfs_chunk_size(dir);
unsigned reclen = NILFS_DIR_REC_LEN(namelen);
unsigned short rec_len, name_len;
struct page *page = NULL;
struct nilfs_dir_entry *de;
unsigned long npages = dir_pages(dir);
unsigned long n;
char *kaddr;
unsigned from, to;
int err;
/*
* We take care of directory expansion in the same loop.
* This code plays outside i_size, so it locks the page
* to protect that region.
*/
for (n = 0; n <= npages; n++) {
char *dir_end;
page = nilfs_get_page(dir, n);
err = PTR_ERR(page);
if (IS_ERR(page))
goto out;
lock_page(page);
kaddr = page_address(page);
dir_end = kaddr + nilfs_last_byte(dir, n);
de = (struct nilfs_dir_entry *)kaddr;
kaddr += PAGE_CACHE_SIZE - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
name_len = 0;
rec_len = chunk_size;
de->rec_len = nilfs_rec_len_to_disk(chunk_size);
de->inode = 0;
goto got_it;
}
if (de->rec_len == 0) {
nilfs_error(dir->i_sb, __func__,
"zero-length directory entry");
err = -EIO;
goto out_unlock;
}
err = -EEXIST;
if (nilfs_match(namelen, name, de))
goto out_unlock;
name_len = NILFS_DIR_REC_LEN(de->name_len);
rec_len = nilfs_rec_len_from_disk(de->rec_len);
if (!de->inode && rec_len >= reclen)
goto got_it;
if (rec_len >= name_len + reclen)
goto got_it;
de = (struct nilfs_dir_entry *)((char *)de + rec_len);
}
unlock_page(page);
nilfs_put_page(page);
}
BUG();
return -EINVAL;
got_it:
from = (char *)de - (char *)page_address(page);
to = from + rec_len;
err = nilfs_prepare_chunk(page, from, to);
if (err)
goto out_unlock;
if (de->inode) {
struct nilfs_dir_entry *de1;
de1 = (struct nilfs_dir_entry *)((char *)de + name_len);
de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len);
de->rec_len = nilfs_rec_len_to_disk(name_len);
de = de1;
}
de->name_len = namelen;
memcpy(de->name, name, namelen);
de->inode = cpu_to_le64(inode->i_ino);
nilfs_set_de_type(de, inode);
nilfs_commit_chunk(page, page->mapping, from, to);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
nilfs_mark_inode_dirty(dir);
/* OFFSET_CACHE */
out_put:
nilfs_put_page(page);
out:
return err;
out_unlock:
unlock_page(page);
goto out_put;
}
/*
* nilfs_delete_entry deletes a directory entry by merging it with the
* previous entry. Page is up-to-date. Releases the page.
*/
int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
char *kaddr = page_address(page);
unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
unsigned to = ((char *)dir - kaddr) +
nilfs_rec_len_from_disk(dir->rec_len);
struct nilfs_dir_entry *pde = NULL;
struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from);
int err;
while ((char *)de < (char *)dir) {
if (de->rec_len == 0) {
nilfs_error(inode->i_sb, __func__,
"zero-length directory entry");
err = -EIO;
goto out;
}
pde = de;
de = nilfs_next_entry(de);
}
if (pde)
from = (char *)pde - (char *)page_address(page);
lock_page(page);
err = nilfs_prepare_chunk(page, from, to);
BUG_ON(err);
if (pde)
pde->rec_len = nilfs_rec_len_to_disk(to - from);
dir->inode = 0;
nilfs_commit_chunk(page, mapping, from, to);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
out:
nilfs_put_page(page);
return err;
}
/*
* Set the first fragment of directory.
*/
int nilfs_make_empty(struct inode *inode, struct inode *parent)
{
struct address_space *mapping = inode->i_mapping;
struct page *page = grab_cache_page(mapping, 0);
unsigned chunk_size = nilfs_chunk_size(inode);
struct nilfs_dir_entry *de;
int err;
void *kaddr;
if (!page)
return -ENOMEM;
err = nilfs_prepare_chunk(page, 0, chunk_size);
if (unlikely(err)) {
unlock_page(page);
goto fail;
}
kaddr = kmap_atomic(page);
memset(kaddr, 0, chunk_size);
de = (struct nilfs_dir_entry *)kaddr;
de->name_len = 1;
de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1));
memcpy(de->name, ".\0\0", 4);
de->inode = cpu_to_le64(inode->i_ino);
nilfs_set_de_type(de, inode);
de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
de->name_len = 2;
de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1));
de->inode = cpu_to_le64(parent->i_ino);
memcpy(de->name, "..\0", 4);
nilfs_set_de_type(de, inode);
kunmap_atomic(kaddr);
nilfs_commit_chunk(page, mapping, 0, chunk_size);
fail:
page_cache_release(page);
return err;
}
/*
* routine to check that the specified directory is empty (for rmdir)
*/
int nilfs_empty_dir(struct inode *inode)
{
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
for (i = 0; i < npages; i++) {
char *kaddr;
struct nilfs_dir_entry *de;
page = nilfs_get_page(inode, i);
if (IS_ERR(page))
continue;
kaddr = page_address(page);
de = (struct nilfs_dir_entry *)kaddr;
kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
while ((char *)de <= kaddr) {
if (de->rec_len == 0) {
nilfs_error(inode->i_sb, __func__,
"zero-length directory entry "
"(kaddr=%p, de=%p)\n", kaddr, de);
goto not_empty;
}
if (de->inode != 0) {
/* check for . and .. */
if (de->name[0] != '.')
goto not_empty;
if (de->name_len > 2)
goto not_empty;
if (de->name_len < 2) {
if (de->inode !=
cpu_to_le64(inode->i_ino))
goto not_empty;
} else if (de->name[1] != '.')
goto not_empty;
}
de = nilfs_next_entry(de);
}
nilfs_put_page(page);
}
return 1;
not_empty:
nilfs_put_page(page);
return 0;
}
const struct file_operations nilfs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate = nilfs_readdir,
.unlocked_ioctl = nilfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = nilfs_compat_ioctl,
#endif /* CONFIG_COMPAT */
.fsync = nilfs_sync_file,
};

369
fs/nilfs2/direct.c Normal file
View file

@ -0,0 +1,369 @@
/*
* direct.c - NILFS direct block pointer.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#include <linux/errno.h>
#include "nilfs.h"
#include "page.h"
#include "direct.h"
#include "alloc.h"
#include "dat.h"
static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct)
{
return (__le64 *)
((struct nilfs_direct_node *)direct->b_u.u_data + 1);
}
static inline __u64
nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key)
{
return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key));
}
static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct,
__u64 key, __u64 ptr)
{
*(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr);
}
static int nilfs_direct_lookup(const struct nilfs_bmap *direct,
__u64 key, int level, __u64 *ptrp)
{
__u64 ptr;
if (key > NILFS_DIRECT_KEY_MAX || level != 1)
return -ENOENT;
ptr = nilfs_direct_get_ptr(direct, key);
if (ptr == NILFS_BMAP_INVALID_PTR)
return -ENOENT;
*ptrp = ptr;
return 0;
}
static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
__u64 key, __u64 *ptrp,
unsigned maxblocks)
{
struct inode *dat = NULL;
__u64 ptr, ptr2;
sector_t blocknr;
int ret, cnt;
if (key > NILFS_DIRECT_KEY_MAX)
return -ENOENT;
ptr = nilfs_direct_get_ptr(direct, key);
if (ptr == NILFS_BMAP_INVALID_PTR)
return -ENOENT;
if (NILFS_BMAP_USE_VBN(direct)) {
dat = nilfs_bmap_get_dat(direct);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
return ret;
ptr = blocknr;
}
maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1);
for (cnt = 1; cnt < maxblocks &&
(ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) !=
NILFS_BMAP_INVALID_PTR;
cnt++) {
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
return ret;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt)
break;
}
*ptrp = ptr;
return cnt;
}
static __u64
nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key)
{
__u64 ptr;
ptr = nilfs_bmap_find_target_seq(direct, key);
if (ptr != NILFS_BMAP_INVALID_PTR)
/* sequential access */
return ptr;
else
/* block group */
return nilfs_bmap_find_target_in_group(direct);
}
static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
{
union nilfs_bmap_ptr_req req;
struct inode *dat = NULL;
struct buffer_head *bh;
int ret;
if (key > NILFS_DIRECT_KEY_MAX)
return -ENOENT;
if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR)
return -EEXIST;
if (NILFS_BMAP_USE_VBN(bmap)) {
req.bpr_ptr = nilfs_direct_find_target_v(bmap, key);
dat = nilfs_bmap_get_dat(bmap);
}
ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat);
if (!ret) {
/* ptr must be a pointer to a buffer head. */
bh = (struct buffer_head *)((unsigned long)ptr);
set_buffer_nilfs_volatile(bh);
nilfs_bmap_commit_alloc_ptr(bmap, &req, dat);
nilfs_direct_set_ptr(bmap, key, req.bpr_ptr);
if (!nilfs_bmap_dirty(bmap))
nilfs_bmap_set_dirty(bmap);
if (NILFS_BMAP_USE_VBN(bmap))
nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr);
nilfs_inode_add_blocks(bmap->b_inode, 1);
}
return ret;
}
static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
{
union nilfs_bmap_ptr_req req;
struct inode *dat;
int ret;
if (key > NILFS_DIRECT_KEY_MAX ||
nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR)
return -ENOENT;
dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
req.bpr_ptr = nilfs_direct_get_ptr(bmap, key);
ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat);
if (!ret) {
nilfs_bmap_commit_end_ptr(bmap, &req, dat);
nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR);
nilfs_inode_sub_blocks(bmap->b_inode, 1);
}
return ret;
}
static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
{
__u64 key, lastkey;
lastkey = NILFS_DIRECT_KEY_MAX + 1;
for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++)
if (nilfs_direct_get_ptr(direct, key) !=
NILFS_BMAP_INVALID_PTR)
lastkey = key;
if (lastkey == NILFS_DIRECT_KEY_MAX + 1)
return -ENOENT;
*keyp = lastkey;
return 0;
}
static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key)
{
return key > NILFS_DIRECT_KEY_MAX;
}
static int nilfs_direct_gather_data(struct nilfs_bmap *direct,
__u64 *keys, __u64 *ptrs, int nitems)
{
__u64 key;
__u64 ptr;
int n;
if (nitems > NILFS_DIRECT_NBLOCKS)
nitems = NILFS_DIRECT_NBLOCKS;
n = 0;
for (key = 0; key < nitems; key++) {
ptr = nilfs_direct_get_ptr(direct, key);
if (ptr != NILFS_BMAP_INVALID_PTR) {
keys[n] = key;
ptrs[n] = ptr;
n++;
}
}
return n;
}
int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
__u64 key, __u64 *keys, __u64 *ptrs, int n)
{
__le64 *dptrs;
int ret, i, j;
/* no need to allocate any resource for conversion */
/* delete */
ret = bmap->b_ops->bop_delete(bmap, key);
if (ret < 0)
return ret;
/* free resources */
if (bmap->b_ops->bop_clear != NULL)
bmap->b_ops->bop_clear(bmap);
/* convert */
dptrs = nilfs_direct_dptrs(bmap);
for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) {
if ((j < n) && (i == keys[j])) {
dptrs[i] = (i != key) ?
cpu_to_le64(ptrs[j]) :
NILFS_BMAP_INVALID_PTR;
j++;
} else
dptrs[i] = NILFS_BMAP_INVALID_PTR;
}
nilfs_direct_init(bmap);
return 0;
}
static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
struct buffer_head *bh)
{
struct nilfs_palloc_req oldreq, newreq;
struct inode *dat;
__u64 key;
__u64 ptr;
int ret;
if (!NILFS_BMAP_USE_VBN(bmap))
return 0;
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq);
if (ret < 0)
return ret;
nilfs_dat_commit_update(dat, &oldreq, &newreq,
bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
set_buffer_nilfs_volatile(bh);
nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr);
} else
ret = nilfs_dat_mark_dirty(dat, ptr);
return ret;
}
static int nilfs_direct_assign_v(struct nilfs_bmap *direct,
__u64 key, __u64 ptr,
struct buffer_head **bh,
sector_t blocknr,
union nilfs_binfo *binfo)
{
struct inode *dat = nilfs_bmap_get_dat(direct);
union nilfs_bmap_ptr_req req;
int ret;
req.bpr_ptr = ptr;
ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
if (!ret) {
nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
binfo->bi_v.bi_blkoff = cpu_to_le64(key);
}
return ret;
}
static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
__u64 key, __u64 ptr,
struct buffer_head **bh,
sector_t blocknr,
union nilfs_binfo *binfo)
{
nilfs_direct_set_ptr(direct, key, blocknr);
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = 0;
return 0;
}
static int nilfs_direct_assign(struct nilfs_bmap *bmap,
struct buffer_head **bh,
sector_t blocknr,
union nilfs_binfo *binfo)
{
__u64 key;
__u64 ptr;
key = nilfs_bmap_data_get_key(bmap, *bh);
if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
printk(KERN_CRIT "%s: invalid key: %llu\n", __func__,
(unsigned long long)key);
return -EINVAL;
}
ptr = nilfs_direct_get_ptr(bmap, key);
if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__,
(unsigned long long)ptr);
return -EINVAL;
}
return NILFS_BMAP_USE_VBN(bmap) ?
nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) :
nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo);
}
static const struct nilfs_bmap_operations nilfs_direct_ops = {
.bop_lookup = nilfs_direct_lookup,
.bop_lookup_contig = nilfs_direct_lookup_contig,
.bop_insert = nilfs_direct_insert,
.bop_delete = nilfs_direct_delete,
.bop_clear = NULL,
.bop_propagate = nilfs_direct_propagate,
.bop_lookup_dirty_buffers = NULL,
.bop_assign = nilfs_direct_assign,
.bop_mark = NULL,
.bop_last_key = nilfs_direct_last_key,
.bop_check_insert = nilfs_direct_check_insert,
.bop_check_delete = NULL,
.bop_gather_data = nilfs_direct_gather_data,
};
int nilfs_direct_init(struct nilfs_bmap *bmap)
{
bmap->b_ops = &nilfs_direct_ops;
return 0;
}

51
fs/nilfs2/direct.h Normal file
View file

@ -0,0 +1,51 @@
/*
* direct.h - NILFS direct block pointer.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#ifndef _NILFS_DIRECT_H
#define _NILFS_DIRECT_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include "bmap.h"
/**
* struct nilfs_direct_node - direct node
* @dn_flags: flags
* @dn_pad: padding
*/
struct nilfs_direct_node {
__u8 dn_flags;
__u8 pad[7];
};
#define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1)
#define NILFS_DIRECT_KEY_MIN 0
#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1)
int nilfs_direct_init(struct nilfs_bmap *);
int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *,
__u64 *, int);
#endif /* _NILFS_DIRECT_H */

25
fs/nilfs2/export.h Normal file
View file

@ -0,0 +1,25 @@
#ifndef NILFS_EXPORT_H
#define NILFS_EXPORT_H
#include <linux/exportfs.h>
extern const struct export_operations nilfs_export_ops;
/**
* struct nilfs_fid - NILFS file id type
* @cno: checkpoint number
* @ino: inode number
* @gen: file generation (version) for NFS
* @parent_gen: parent generation (version) for NFS
* @parent_ino: parent inode number
*/
struct nilfs_fid {
u64 cno;
u64 ino;
u32 gen;
u32 parent_gen;
u64 parent_ino;
} __attribute__ ((packed));
#endif

174
fs/nilfs2/file.c Normal file
View file

@ -0,0 +1,174 @@
/*
* file.c - NILFS regular file handling primitives including fsync().
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Amagai Yoshiji <amagai@osrg.net>,
* Ryusuke Konishi <ryusuke@osrg.net>
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/writeback.h>
#include "nilfs.h"
#include "segment.h"
int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
/*
* Called from fsync() system call
* This is the only entry point that can catch write and synch
* timing for both data blocks and intermediate blocks.
*
* This function should be implemented when the writeback function
* will be implemented.
*/
struct the_nilfs *nilfs;
struct inode *inode = file->f_mapping->host;
int err;
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
mutex_lock(&inode->i_mutex);
if (nilfs_inode_dirty(inode)) {
if (datasync)
err = nilfs_construct_dsync_segment(inode->i_sb, inode,
0, LLONG_MAX);
else
err = nilfs_construct_segment(inode->i_sb);
}
mutex_unlock(&inode->i_mutex);
nilfs = inode->i_sb->s_fs_info;
if (!err)
err = nilfs_flush_device(nilfs);
return err;
}
static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct nilfs_transaction_info ti;
int ret = 0;
if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
return VM_FAULT_SIGBUS; /* -ENOSPC */
sb_start_pagefault(inode->i_sb);
lock_page(page);
if (page->mapping != inode->i_mapping ||
page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
unlock_page(page);
ret = -EFAULT; /* make the VM retry the fault */
goto out;
}
/*
* check to see if the page is mapped already (no holes)
*/
if (PageMappedToDisk(page))
goto mapped;
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
int fully_mapped = 1;
bh = head = page_buffers(page);
do {
if (!buffer_mapped(bh)) {
fully_mapped = 0;
break;
}
} while (bh = bh->b_this_page, bh != head);
if (fully_mapped) {
SetPageMappedToDisk(page);
goto mapped;
}
}
unlock_page(page);
/*
* fill hole blocks
*/
ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
/* never returns -ENOMEM, but may return -ENOSPC */
if (unlikely(ret))
goto out;
file_update_time(vma->vm_file);
ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
if (ret) {
nilfs_transaction_abort(inode->i_sb);
goto out;
}
nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
nilfs_transaction_commit(inode->i_sb);
mapped:
wait_for_stable_page(page);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
}
static const struct vm_operations_struct nilfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = nilfs_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
vma->vm_ops = &nilfs_file_vm_ops;
return 0;
}
/*
* We have mostly NULL's here: the current defaults are ok for
* the nilfs filesystem.
*/
const struct file_operations nilfs_file_operations = {
.llseek = generic_file_llseek,
.read = new_sync_read,
.write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.unlocked_ioctl = nilfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = nilfs_compat_ioctl,
#endif /* CONFIG_COMPAT */
.mmap = nilfs_file_mmap,
.open = generic_file_open,
/* .release = nilfs_release_file, */
.fsync = nilfs_sync_file,
.splice_read = generic_file_splice_read,
};
const struct inode_operations nilfs_file_inode_operations = {
.setattr = nilfs_setattr,
.permission = nilfs_permission,
.fiemap = nilfs_fiemap,
};
/* end of file */

198
fs/nilfs2/gcinode.c Normal file
View file

@ -0,0 +1,198 @@
/*
* gcinode.c - dummy inodes to buffer blocks for garbage collection
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>,
* and Ryusuke Konishi <ryusuke@osrg.net>.
* Revised by Ryusuke Konishi <ryusuke@osrg.net>.
*
*/
/*
* This file adds the cache of on-disk blocks to be moved in garbage
* collection. The disk blocks are held with dummy inodes (called
* gcinodes), and this file provides lookup function of the dummy
* inodes and their buffer read function.
*
* Buffers and pages held by the dummy inodes will be released each
* time after they are copied to a new log. Dirty blocks made on the
* current generation and the blocks to be moved by GC never overlap
* because the dirty blocks make a new generation; they rather must be
* written individually.
*/
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include "nilfs.h"
#include "btree.h"
#include "btnode.h"
#include "page.h"
#include "mdt.h"
#include "dat.h"
#include "ifile.h"
/*
* nilfs_gccache_submit_read_data() - add data buffer and submit read request
* @inode - gc inode
* @blkoff - dummy offset treated as the key for the page cache
* @pbn - physical block number of the block
* @vbn - virtual block number of the block, 0 for non-virtual block
* @out_bh - indirect pointer to a buffer_head struct to receive the results
*
* Description: nilfs_gccache_submit_read_data() registers the data buffer
* specified by @pbn to the GC pagecache with the key @blkoff.
* This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer.
*
* Return Value: On success, 0 is returned. On Error, one of the following
* negative error code is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - The block specified with @pbn does not exist.
*/
int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
sector_t pbn, __u64 vbn,
struct buffer_head **out_bh)
{
struct buffer_head *bh;
int err;
bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
if (unlikely(!bh))
return -ENOMEM;
if (buffer_uptodate(bh))
goto out;
if (pbn == 0) {
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
brelse(bh);
goto failed;
}
}
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
goto out;
}
if (!buffer_mapped(bh)) {
bh->b_bdev = inode->i_sb->s_bdev;
set_buffer_mapped(bh);
}
bh->b_blocknr = pbn;
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
submit_bh(READ, bh);
if (vbn)
bh->b_blocknr = vbn;
out:
err = 0;
*out_bh = bh;
failed:
unlock_page(bh->b_page);
page_cache_release(bh->b_page);
return err;
}
/*
* nilfs_gccache_submit_read_node() - add node buffer and submit read request
* @inode - gc inode
* @pbn - physical block number for the block
* @vbn - virtual block number for the block
* @out_bh - indirect pointer to a buffer_head struct to receive the results
*
* Description: nilfs_gccache_submit_read_node() registers the node buffer
* specified by @vbn to the GC pagecache. @pbn can be supplied by the
* caller to avoid translation of the disk block address.
*
* Return Value: On success, 0 is returned. On Error, one of the following
* negative error code is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
__u64 vbn, struct buffer_head **out_bh)
{
int ret;
ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
vbn ? : pbn, pbn, READ, out_bh, &pbn);
if (ret == -EEXIST) /* internal code (cache hit) */
ret = 0;
return ret;
}
int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
{
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
return -EIO;
if (buffer_dirty(bh))
return -EEXIST;
if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
clear_buffer_uptodate(bh);
return -EIO;
}
mark_buffer_dirty(bh);
return 0;
}
int nilfs_init_gcinode(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
inode->i_mapping->a_ops = &empty_aops;
inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
ii->i_flags = 0;
nilfs_bmap_init_gc(ii->i_bmap);
return 0;
}
/**
* nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes
*/
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
{
struct list_head *head = &nilfs->ns_gc_inodes;
struct nilfs_inode_info *ii;
while (!list_empty(head)) {
ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
list_del_init(&ii->i_dirty);
truncate_inode_pages(&ii->vfs_inode.i_data, 0);
nilfs_btnode_cache_clear(&ii->i_btnode_cache);
iput(&ii->vfs_inode);
}
}

227
fs/nilfs2/ifile.c Normal file
View file

@ -0,0 +1,227 @@
/*
* ifile.c - NILFS inode file
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Amagai Yoshiji <amagai@osrg.net>.
* Revised by Ryusuke Konishi <ryusuke@osrg.net>.
*
*/
#include <linux/types.h>
#include <linux/buffer_head.h>
#include "nilfs.h"
#include "mdt.h"
#include "alloc.h"
#include "ifile.h"
/**
* struct nilfs_ifile_info - on-memory private data of ifile
* @mi: on-memory private data of metadata file
* @palloc_cache: persistent object allocator cache of ifile
*/
struct nilfs_ifile_info {
struct nilfs_mdt_info mi;
struct nilfs_palloc_cache palloc_cache;
};
static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile)
{
return (struct nilfs_ifile_info *)NILFS_MDT(ifile);
}
/**
* nilfs_ifile_create_inode - create a new disk inode
* @ifile: ifile inode
* @out_ino: pointer to a variable to store inode number
* @out_bh: buffer_head contains newly allocated disk inode
*
* Return Value: On success, 0 is returned and the newly allocated inode
* number is stored in the place pointed by @ino, and buffer_head pointer
* that contains newly allocated disk inode structure is stored in the
* place pointed by @out_bh
* On error, one of the following negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOSPC - No inode left.
*/
int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct buffer_head **out_bh)
{
struct nilfs_palloc_req req;
int ret;
req.pr_entry_nr = 0; /* 0 says find free inode from beginning of
a group. dull code!! */
req.pr_entry_bh = NULL;
ret = nilfs_palloc_prepare_alloc_entry(ifile, &req);
if (!ret) {
ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1,
&req.pr_entry_bh);
if (ret < 0)
nilfs_palloc_abort_alloc_entry(ifile, &req);
}
if (ret < 0) {
brelse(req.pr_entry_bh);
return ret;
}
nilfs_palloc_commit_alloc_entry(ifile, &req);
mark_buffer_dirty(req.pr_entry_bh);
nilfs_mdt_mark_dirty(ifile);
*out_ino = (ino_t)req.pr_entry_nr;
*out_bh = req.pr_entry_bh;
return 0;
}
/**
* nilfs_ifile_delete_inode - delete a disk inode
* @ifile: ifile inode
* @ino: inode number
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - The inode number @ino have not been allocated.
*/
int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
{
struct nilfs_palloc_req req = {
.pr_entry_nr = ino, .pr_entry_bh = NULL
};
struct nilfs_inode *raw_inode;
void *kaddr;
int ret;
ret = nilfs_palloc_prepare_free_entry(ifile, &req);
if (!ret) {
ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 0,
&req.pr_entry_bh);
if (ret < 0)
nilfs_palloc_abort_free_entry(ifile, &req);
}
if (ret < 0) {
brelse(req.pr_entry_bh);
return ret;
}
kaddr = kmap_atomic(req.pr_entry_bh->b_page);
raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
req.pr_entry_bh, kaddr);
raw_inode->i_flags = 0;
kunmap_atomic(kaddr);
mark_buffer_dirty(req.pr_entry_bh);
brelse(req.pr_entry_bh);
nilfs_palloc_commit_free_entry(ifile, &req);
return 0;
}
int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
struct buffer_head **out_bh)
{
struct super_block *sb = ifile->i_sb;
int err;
if (unlikely(!NILFS_VALID_INODE(sb, ino))) {
nilfs_error(sb, __func__, "bad inode number: %lu",
(unsigned long) ino);
return -EINVAL;
}
err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
if (unlikely(err))
nilfs_warning(sb, __func__, "unable to read inode: %lu",
(unsigned long) ino);
return err;
}
/**
* nilfs_ifile_count_free_inodes - calculate free inodes count
* @ifile: ifile inode
* @nmaxinodes: current maximum of available inodes count [out]
* @nfreeinodes: free inodes count [out]
*/
int nilfs_ifile_count_free_inodes(struct inode *ifile,
u64 *nmaxinodes, u64 *nfreeinodes)
{
u64 nused;
int err;
*nmaxinodes = 0;
*nfreeinodes = 0;
nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count);
err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes);
if (likely(!err))
*nfreeinodes = *nmaxinodes - nused;
return err;
}
/**
* nilfs_ifile_read - read or get ifile inode
* @sb: super block instance
* @root: root object
* @inode_size: size of an inode
* @raw_inode: on-disk ifile inode
* @inodep: buffer to store the inode
*/
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
size_t inode_size, struct nilfs_inode *raw_inode,
struct inode **inodep)
{
struct inode *ifile;
int err;
ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO);
if (unlikely(!ifile))
return -ENOMEM;
if (!(ifile->i_state & I_NEW))
goto out;
err = nilfs_mdt_init(ifile, NILFS_MDT_GFP,
sizeof(struct nilfs_ifile_info));
if (err)
goto failed;
err = nilfs_palloc_init_blockgroup(ifile, inode_size);
if (err)
goto failed;
nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache);
err = nilfs_read_inode_common(ifile, raw_inode);
if (err)
goto failed;
unlock_new_inode(ifile);
out:
*inodep = ifile;
return 0;
failed:
iget_failed(ifile);
return err;
}

58
fs/nilfs2/ifile.h Normal file
View file

@ -0,0 +1,58 @@
/*
* ifile.h - NILFS inode file
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Amagai Yoshiji <amagai@osrg.net>
* Revised by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#ifndef _NILFS_IFILE_H
#define _NILFS_IFILE_H
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/nilfs2_fs.h>
#include "mdt.h"
#include "alloc.h"
static inline struct nilfs_inode *
nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
{
void *kaddr = kmap(ibh->b_page);
return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr);
}
static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino,
struct buffer_head *ibh)
{
kunmap(ibh->b_page);
}
int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
int nilfs_ifile_delete_inode(struct inode *, ino_t);
int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *);
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
size_t inode_size, struct nilfs_inode *raw_inode,
struct inode **inodep);
#endif /* _NILFS_IFILE_H */

1136
fs/nilfs2/inode.c Normal file

File diff suppressed because it is too large Load diff

1379
fs/nilfs2/ioctl.c Normal file

File diff suppressed because it is too large Load diff

600
fs/nilfs2/mdt.c Normal file
View file

@ -0,0 +1,600 @@
/*
* mdt.c - meta data file for NILFS
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*/
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/mm.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
#include "page.h"
#include "mdt.h"
#define NILFS_MDT_MAX_RA_BLOCKS (16 - 1)
static int
nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
struct buffer_head *bh,
void (*init_block)(struct inode *,
struct buffer_head *, void *))
{
struct nilfs_inode_info *ii = NILFS_I(inode);
void *kaddr;
int ret;
/* Caller exclude read accesses using page lock */
/* set_buffer_new(bh); */
bh->b_blocknr = 0;
ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh);
if (unlikely(ret))
return ret;
set_buffer_mapped(bh);
kaddr = kmap_atomic(bh->b_page);
memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
if (init_block)
init_block(inode, bh, kaddr);
flush_dcache_page(bh->b_page);
kunmap_atomic(kaddr);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
return 0;
}
static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
struct buffer_head **out_bh,
void (*init_block)(struct inode *,
struct buffer_head *,
void *))
{
struct super_block *sb = inode->i_sb;
struct nilfs_transaction_info ti;
struct buffer_head *bh;
int err;
nilfs_transaction_begin(sb, &ti, 0);
err = -ENOMEM;
bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
if (unlikely(!bh))
goto failed_unlock;
err = -EEXIST;
if (buffer_uptodate(bh))
goto failed_bh;
wait_on_buffer(bh);
if (buffer_uptodate(bh))
goto failed_bh;
bh->b_bdev = sb->s_bdev;
err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
if (likely(!err)) {
get_bh(bh);
*out_bh = bh;
}
failed_bh:
unlock_page(bh->b_page);
page_cache_release(bh->b_page);
brelse(bh);
failed_unlock:
if (likely(!err))
err = nilfs_transaction_commit(sb);
else
nilfs_transaction_abort(sb);
return err;
}
static int
nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
int mode, struct buffer_head **out_bh)
{
struct buffer_head *bh;
__u64 blknum = 0;
int ret = -ENOMEM;
bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
if (unlikely(!bh))
goto failed;
ret = -EEXIST; /* internal code */
if (buffer_uptodate(bh))
goto out;
if (mode == READA) {
if (!trylock_buffer(bh)) {
ret = -EBUSY;
goto failed_bh;
}
} else /* mode == READ */
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
goto out;
}
ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
if (unlikely(ret)) {
unlock_buffer(bh);
goto failed_bh;
}
map_bh(bh, inode->i_sb, (sector_t)blknum);
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
submit_bh(mode, bh);
ret = 0;
out:
get_bh(bh);
*out_bh = bh;
failed_bh:
unlock_page(bh->b_page);
page_cache_release(bh->b_page);
brelse(bh);
failed:
return ret;
}
static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
int readahead, struct buffer_head **out_bh)
{
struct buffer_head *first_bh, *bh;
unsigned long blkoff;
int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
int err;
err = nilfs_mdt_submit_block(inode, block, READ, &first_bh);
if (err == -EEXIST) /* internal code */
goto out;
if (unlikely(err))
goto failed;
if (readahead) {
blkoff = block + 1;
for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
if (likely(!err || err == -EEXIST))
brelse(bh);
else if (err != -EBUSY)
break;
/* abort readahead if bmap lookup failed */
if (!buffer_locked(first_bh))
goto out_no_wait;
}
}
wait_on_buffer(first_bh);
out_no_wait:
err = -EIO;
if (!buffer_uptodate(first_bh))
goto failed_bh;
out:
*out_bh = first_bh;
return 0;
failed_bh:
brelse(first_bh);
failed:
return err;
}
/**
* nilfs_mdt_get_block - read or create a buffer on meta data file.
* @inode: inode of the meta data file
* @blkoff: block offset
* @create: create flag
* @init_block: initializer used for newly allocated block
* @out_bh: output of a pointer to the buffer_head
*
* nilfs_mdt_get_block() looks up the specified buffer and tries to create
* a new buffer if @create is not zero. On success, the returned buffer is
* assured to be either existing or formatted using a buffer lock on success.
* @out_bh is substituted only when zero is returned.
*
* Return Value: On success, it returns 0. On error, the following negative
* error code is returned.
*
* %-ENOMEM - Insufficient memory available.
*
* %-EIO - I/O error
*
* %-ENOENT - the specified block does not exist (hole block)
*
* %-EROFS - Read only filesystem (for create mode)
*/
int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
void (*init_block)(struct inode *,
struct buffer_head *, void *),
struct buffer_head **out_bh)
{
int ret;
/* Should be rewritten with merging nilfs_mdt_read_block() */
retry:
ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
if (!create || ret != -ENOENT)
return ret;
ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block);
if (unlikely(ret == -EEXIST)) {
/* create = 0; */ /* limit read-create loop retries */
goto retry;
}
return ret;
}
/**
* nilfs_mdt_delete_block - make a hole on the meta data file.
* @inode: inode of the meta data file
* @block: block offset
*
* Return Value: On success, zero is returned.
* On error, one of the following negative error code is returned.
*
* %-ENOMEM - Insufficient memory available.
*
* %-EIO - I/O error
*/
int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
int err;
err = nilfs_bmap_delete(ii->i_bmap, block);
if (!err || err == -ENOENT) {
nilfs_mdt_mark_dirty(inode);
nilfs_mdt_forget_block(inode, block);
}
return err;
}
/**
* nilfs_mdt_forget_block - discard dirty state and try to remove the page
* @inode: inode of the meta data file
* @block: block offset
*
* nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
* tries to release the page including the buffer from a page cache.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error code is returned.
*
* %-EBUSY - page has an active buffer.
*
* %-ENOENT - page cache has no page addressed by the offset.
*/
int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
{
pgoff_t index = (pgoff_t)block >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
struct page *page;
unsigned long first_block;
int ret = 0;
int still_dirty;
page = find_lock_page(inode->i_mapping, index);
if (!page)
return -ENOENT;
wait_on_page_writeback(page);
first_block = (unsigned long)index <<
(PAGE_CACHE_SHIFT - inode->i_blkbits);
if (page_has_buffers(page)) {
struct buffer_head *bh;
bh = nilfs_page_get_nth_block(page, block - first_block);
nilfs_forget_buffer(bh);
}
still_dirty = PageDirty(page);
unlock_page(page);
page_cache_release(page);
if (still_dirty ||
invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
ret = -EBUSY;
return ret;
}
/**
* nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty.
* @inode: inode of the meta data file
* @block: block offset
*
* Return Value: On success, it returns 0. On error, the following negative
* error code is returned.
*
* %-ENOMEM - Insufficient memory available.
*
* %-EIO - I/O error
*
* %-ENOENT - the specified block does not exist (hole block)
*/
int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
{
struct buffer_head *bh;
int err;
err = nilfs_mdt_read_block(inode, block, 0, &bh);
if (unlikely(err))
return err;
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
brelse(bh);
return 0;
}
int nilfs_mdt_fetch_dirty(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) {
set_bit(NILFS_I_DIRTY, &ii->i_state);
return 1;
}
return test_bit(NILFS_I_DIRTY, &ii->i_state);
}
static int
nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct super_block *sb;
int err = 0;
if (inode && (inode->i_sb->s_flags & MS_RDONLY)) {
/*
* It means that filesystem was remounted in read-only
* mode because of error or metadata corruption. But we
* have dirty pages that try to be flushed in background.
* So, here we simply discard this dirty page.
*/
nilfs_clear_dirty_page(page, false);
unlock_page(page);
return -EROFS;
}
redirty_page_for_writepage(wbc, page);
unlock_page(page);
if (!inode)
return 0;
sb = inode->i_sb;
if (wbc->sync_mode == WB_SYNC_ALL)
err = nilfs_construct_segment(sb);
else if (wbc->for_reclaim)
nilfs_flush_segment(sb, inode->i_ino);
return err;
}
static const struct address_space_operations def_mdt_aops = {
.writepage = nilfs_mdt_write_page,
};
static const struct inode_operations def_mdt_iops;
static const struct file_operations def_mdt_fops;
int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
{
struct nilfs_mdt_info *mi;
mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
if (!mi)
return -ENOMEM;
init_rwsem(&mi->mi_sem);
inode->i_private = mi;
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
inode->i_op = &def_mdt_iops;
inode->i_fop = &def_mdt_fops;
inode->i_mapping->a_ops = &def_mdt_aops;
return 0;
}
void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
unsigned header_size)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
mi->mi_entry_size = entry_size;
mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
}
/**
* nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
* @inode: inode of the metadata file
* @shadow: shadow mapping
*/
int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct nilfs_shadow_map *shadow)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
struct backing_dev_info *bdi = inode->i_sb->s_bdi;
INIT_LIST_HEAD(&shadow->frozen_buffers);
address_space_init_once(&shadow->frozen_data);
nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
address_space_init_once(&shadow->frozen_btnodes);
nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
mi->mi_shadow = shadow;
return 0;
}
/**
* nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
* @inode: inode of the metadata file
*/
int nilfs_mdt_save_to_shadow_map(struct inode *inode)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
struct nilfs_inode_info *ii = NILFS_I(inode);
struct nilfs_shadow_map *shadow = mi->mi_shadow;
int ret;
ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping);
if (ret)
goto out;
ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes,
&ii->i_btnode_cache);
if (ret)
goto out;
nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store);
out:
return ret;
}
int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen;
struct page *page;
int blkbits = inode->i_blkbits;
page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
if (!page)
return -ENOMEM;
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << blkbits, 0);
bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
if (!buffer_uptodate(bh_frozen))
nilfs_copy_buffer(bh_frozen, bh);
if (list_empty(&bh_frozen->b_assoc_buffers)) {
list_add_tail(&bh_frozen->b_assoc_buffers,
&shadow->frozen_buffers);
set_buffer_nilfs_redirected(bh);
} else {
brelse(bh_frozen); /* already frozen */
}
unlock_page(page);
page_cache_release(page);
return 0;
}
struct buffer_head *
nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen = NULL;
struct page *page;
int n;
page = find_lock_page(&shadow->frozen_data, bh->b_page->index);
if (page) {
if (page_has_buffers(page)) {
n = bh_offset(bh) >> inode->i_blkbits;
bh_frozen = nilfs_page_get_nth_block(page, n);
}
unlock_page(page);
page_cache_release(page);
}
return bh_frozen;
}
static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow)
{
struct list_head *head = &shadow->frozen_buffers;
struct buffer_head *bh;
while (!list_empty(head)) {
bh = list_first_entry(head, struct buffer_head,
b_assoc_buffers);
list_del_init(&bh->b_assoc_buffers);
brelse(bh); /* drop ref-count to make it releasable */
}
}
/**
* nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state
* @inode: inode of the metadata file
*/
void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
struct nilfs_inode_info *ii = NILFS_I(inode);
struct nilfs_shadow_map *shadow = mi->mi_shadow;
down_write(&mi->mi_sem);
if (mi->mi_palloc_cache)
nilfs_palloc_clear_cache(inode);
nilfs_clear_dirty_pages(inode->i_mapping, true);
nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);
nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
up_write(&mi->mi_sem);
}
/**
* nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches
* @inode: inode of the metadata file
*/
void nilfs_mdt_clear_shadow_map(struct inode *inode)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
struct nilfs_shadow_map *shadow = mi->mi_shadow;
down_write(&mi->mi_sem);
nilfs_release_frozen_buffers(shadow);
truncate_inode_pages(&shadow->frozen_data, 0);
truncate_inode_pages(&shadow->frozen_btnodes, 0);
up_write(&mi->mi_sem);
}

117
fs/nilfs2/mdt.h Normal file
View file

@ -0,0 +1,117 @@
/*
* mdt.h - NILFS meta data file prototype and definitions
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*/
#ifndef _NILFS_MDT_H
#define _NILFS_MDT_H
#include <linux/buffer_head.h>
#include <linux/blockgroup_lock.h>
#include "nilfs.h"
#include "page.h"
/**
* struct nilfs_shadow_map - shadow mapping of meta data file
* @bmap_store: shadow copy of bmap state
* @frozen_data: shadowed dirty data pages
* @frozen_btnodes: shadowed dirty b-tree nodes' pages
* @frozen_buffers: list of frozen buffers
*/
struct nilfs_shadow_map {
struct nilfs_bmap_store bmap_store;
struct address_space frozen_data;
struct address_space frozen_btnodes;
struct list_head frozen_buffers;
};
/**
* struct nilfs_mdt_info - on-memory private data of meta data files
* @mi_sem: reader/writer semaphore for meta data operations
* @mi_bgl: per-blockgroup locking
* @mi_entry_size: size of an entry
* @mi_first_entry_offset: offset to the first entry
* @mi_entries_per_block: number of entries in a block
* @mi_palloc_cache: persistent object allocator cache
* @mi_shadow: shadow of bmap and page caches
* @mi_blocks_per_group: number of blocks in a group
* @mi_blocks_per_desc_block: number of blocks per descriptor block
*/
struct nilfs_mdt_info {
struct rw_semaphore mi_sem;
struct blockgroup_lock *mi_bgl;
unsigned mi_entry_size;
unsigned mi_first_entry_offset;
unsigned long mi_entries_per_block;
struct nilfs_palloc_cache *mi_palloc_cache;
struct nilfs_shadow_map *mi_shadow;
unsigned long mi_blocks_per_group;
unsigned long mi_blocks_per_desc_block;
};
static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
{
return inode->i_private;
}
/* Default GFP flags using highmem */
#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
int nilfs_mdt_get_block(struct inode *, unsigned long, int,
void (*init_block)(struct inode *,
struct buffer_head *, void *),
struct buffer_head **);
int nilfs_mdt_delete_block(struct inode *, unsigned long);
int nilfs_mdt_forget_block(struct inode *, unsigned long);
int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
int nilfs_mdt_fetch_dirty(struct inode *);
int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz);
void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned);
int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct nilfs_shadow_map *shadow);
int nilfs_mdt_save_to_shadow_map(struct inode *inode);
void nilfs_mdt_restore_from_shadow_map(struct inode *inode);
void nilfs_mdt_clear_shadow_map(struct inode *inode);
int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
struct buffer_head *bh);
static inline void nilfs_mdt_mark_dirty(struct inode *inode)
{
if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
set_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state);
}
static inline void nilfs_mdt_clear_dirty(struct inode *inode)
{
clear_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state);
}
static inline __u64 nilfs_mdt_cno(struct inode *inode)
{
return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
}
#define nilfs_mdt_bgl_lock(inode, bg) \
(&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
#endif /* _NILFS_MDT_H */

585
fs/nilfs2/namei.c Normal file
View file

@ -0,0 +1,585 @@
/*
* namei.c - NILFS pathname lookup operations.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>,
* Ryusuke Konishi <ryusuke@osrg.net>
*/
/*
* linux/fs/ext2/namei.c
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/fs/minix/namei.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
#include <linux/pagemap.h>
#include "nilfs.h"
#include "export.h"
#define NILFS_FID_SIZE_NON_CONNECTABLE \
(offsetof(struct nilfs_fid, parent_gen) / 4)
#define NILFS_FID_SIZE_CONNECTABLE (sizeof(struct nilfs_fid) / 4)
static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = nilfs_add_link(dentry, inode);
if (!err) {
d_instantiate(dentry, inode);
unlock_new_inode(inode);
return 0;
}
inode_dec_link_count(inode);
unlock_new_inode(inode);
iput(inode);
return err;
}
/*
* Methods themselves.
*/
static struct dentry *
nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
ino_t ino;
if (dentry->d_name.len > NILFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
ino = nilfs_inode_by_name(dir, &dentry->d_name);
inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
return d_splice_alias(inode, dentry);
}
/*
* By the time this is called, we already have created
* the directory cache entry for the new file, but it
* is so far negative - it has no inode.
*
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
struct inode *inode;
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
return err;
inode = nilfs_new_inode(dir, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &nilfs_file_inode_operations;
inode->i_fop = &nilfs_file_operations;
inode->i_mapping->a_ops = &nilfs_aops;
nilfs_mark_inode_dirty(inode);
err = nilfs_add_nondir(dentry, inode);
}
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
else
nilfs_transaction_abort(dir->i_sb);
return err;
}
static int
nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
struct nilfs_transaction_info ti;
int err;
if (!new_valid_dev(rdev))
return -EINVAL;
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
return err;
inode = nilfs_new_inode(dir, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
nilfs_mark_inode_dirty(inode);
err = nilfs_add_nondir(dentry, inode);
}
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
else
nilfs_transaction_abort(dir->i_sb);
return err;
}
static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
struct nilfs_transaction_info ti;
struct super_block *sb = dir->i_sb;
unsigned l = strlen(symname)+1;
struct inode *inode;
int err;
if (l > sb->s_blocksize)
return -ENAMETOOLONG;
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
return err;
inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out;
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
goto out_fail;
/* mark_inode_dirty(inode); */
/* page_symlink() do this */
err = nilfs_add_nondir(dentry, inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
else
nilfs_transaction_abort(dir->i_sb);
return err;
out_fail:
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
unlock_new_inode(inode);
iput(inode);
goto out;
}
static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
return err;
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
ihold(inode);
err = nilfs_add_link(dentry, inode);
if (!err) {
d_instantiate(dentry, inode);
err = nilfs_transaction_commit(dir->i_sb);
} else {
inode_dec_link_count(inode);
iput(inode);
nilfs_transaction_abort(dir->i_sb);
}
return err;
}
static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
return err;
inc_nlink(dir);
inode = nilfs_new_inode(dir, S_IFDIR | mode);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_dir;
inode->i_op = &nilfs_dir_inode_operations;
inode->i_fop = &nilfs_dir_operations;
inode->i_mapping->a_ops = &nilfs_aops;
inc_nlink(inode);
err = nilfs_make_empty(inode, dir);
if (err)
goto out_fail;
err = nilfs_add_link(dentry, inode);
if (err)
goto out_fail;
nilfs_mark_inode_dirty(inode);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
else
nilfs_transaction_abort(dir->i_sb);
return err;
out_fail:
drop_nlink(inode);
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
unlock_new_inode(inode);
iput(inode);
out_dir:
drop_nlink(dir);
nilfs_mark_inode_dirty(dir);
goto out;
}
static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
struct nilfs_dir_entry *de;
struct page *page;
int err;
err = -ENOENT;
de = nilfs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto out;
inode = dentry->d_inode;
err = -EIO;
if (le64_to_cpu(de->inode) != inode->i_ino)
goto out;
if (!inode->i_nlink) {
nilfs_warning(inode->i_sb, __func__,
"deleting nonexistent file (%lu), %d\n",
inode->i_ino, inode->i_nlink);
set_nlink(inode, 1);
}
err = nilfs_delete_entry(de, page);
if (err)
goto out;
inode->i_ctime = dir->i_ctime;
drop_nlink(inode);
err = 0;
out:
return err;
}
static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
{
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
if (err)
return err;
err = nilfs_do_unlink(dir, dentry);
if (!err) {
nilfs_mark_inode_dirty(dir);
nilfs_mark_inode_dirty(dentry->d_inode);
err = nilfs_transaction_commit(dir->i_sb);
} else
nilfs_transaction_abort(dir->i_sb);
return err;
}
static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
if (err)
return err;
err = -ENOTEMPTY;
if (nilfs_empty_dir(inode)) {
err = nilfs_do_unlink(dir, dentry);
if (!err) {
inode->i_size = 0;
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
drop_nlink(dir);
nilfs_mark_inode_dirty(dir);
}
}
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
else
nilfs_transaction_abort(dir->i_sb);
return err;
}
static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct page *dir_page = NULL;
struct nilfs_dir_entry *dir_de = NULL;
struct page *old_page;
struct nilfs_dir_entry *old_de;
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1);
if (unlikely(err))
return err;
err = -ENOENT;
old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_de)
goto out;
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
dir_de = nilfs_dotdot(old_inode, &dir_page);
if (!dir_de)
goto out_old;
}
if (new_inode) {
struct page *new_page;
struct nilfs_dir_entry *new_de;
err = -ENOTEMPTY;
if (dir_de && !nilfs_empty_dir(new_inode))
goto out_dir;
err = -ENOENT;
new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
drop_nlink(new_inode);
drop_nlink(new_inode);
nilfs_mark_inode_dirty(new_inode);
} else {
err = nilfs_add_link(new_dentry, old_inode);
if (err)
goto out_dir;
if (dir_de) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
}
}
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
drop_nlink(old_dir);
}
nilfs_mark_inode_dirty(old_dir);
nilfs_mark_inode_dirty(old_inode);
err = nilfs_transaction_commit(old_dir->i_sb);
return err;
out_dir:
if (dir_de) {
kunmap(dir_page);
page_cache_release(dir_page);
}
out_old:
kunmap(old_page);
page_cache_release(old_page);
out:
nilfs_transaction_abort(old_dir->i_sb);
return err;
}
/*
* Export operations
*/
static struct dentry *nilfs_get_parent(struct dentry *child)
{
unsigned long ino;
struct inode *inode;
struct qstr dotdot = QSTR_INIT("..", 2);
struct nilfs_root *root;
ino = nilfs_inode_by_name(child->d_inode, &dotdot);
if (!ino)
return ERR_PTR(-ENOENT);
root = NILFS_I(child->d_inode)->i_root;
inode = nilfs_iget(child->d_inode->i_sb, root, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
return d_obtain_alias(inode);
}
static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
u64 ino, u32 gen)
{
struct nilfs_root *root;
struct inode *inode;
if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO)
return ERR_PTR(-ESTALE);
root = nilfs_lookup_root(sb->s_fs_info, cno);
if (!root)
return ERR_PTR(-ESTALE);
inode = nilfs_iget(sb, root, ino);
nilfs_put_root(root);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (gen && inode->i_generation != gen) {
iput(inode);
return ERR_PTR(-ESTALE);
}
return d_obtain_alias(inode);
}
static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
int fh_len, int fh_type)
{
struct nilfs_fid *fid = (struct nilfs_fid *)fh;
if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE &&
fh_len != NILFS_FID_SIZE_CONNECTABLE) ||
(fh_type != FILEID_NILFS_WITH_PARENT &&
fh_type != FILEID_NILFS_WITHOUT_PARENT))
return NULL;
return nilfs_get_dentry(sb, fid->cno, fid->ino, fid->gen);
}
static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh,
int fh_len, int fh_type)
{
struct nilfs_fid *fid = (struct nilfs_fid *)fh;
if (fh_len != NILFS_FID_SIZE_CONNECTABLE ||
fh_type != FILEID_NILFS_WITH_PARENT)
return NULL;
return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen);
}
static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
struct inode *parent)
{
struct nilfs_fid *fid = (struct nilfs_fid *)fh;
struct nilfs_root *root = NILFS_I(inode)->i_root;
int type;
if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) {
*lenp = NILFS_FID_SIZE_CONNECTABLE;
return FILEID_INVALID;
}
if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) {
*lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
return FILEID_INVALID;
}
fid->cno = root->cno;
fid->ino = inode->i_ino;
fid->gen = inode->i_generation;
if (parent) {
fid->parent_ino = parent->i_ino;
fid->parent_gen = parent->i_generation;
type = FILEID_NILFS_WITH_PARENT;
*lenp = NILFS_FID_SIZE_CONNECTABLE;
} else {
type = FILEID_NILFS_WITHOUT_PARENT;
*lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
}
return type;
}
const struct inode_operations nilfs_dir_inode_operations = {
.create = nilfs_create,
.lookup = nilfs_lookup,
.link = nilfs_link,
.unlink = nilfs_unlink,
.symlink = nilfs_symlink,
.mkdir = nilfs_mkdir,
.rmdir = nilfs_rmdir,
.mknod = nilfs_mknod,
.rename = nilfs_rename,
.setattr = nilfs_setattr,
.permission = nilfs_permission,
.fiemap = nilfs_fiemap,
};
const struct inode_operations nilfs_special_inode_operations = {
.setattr = nilfs_setattr,
.permission = nilfs_permission,
};
const struct inode_operations nilfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.permission = nilfs_permission,
};
const struct export_operations nilfs_export_ops = {
.encode_fh = nilfs_encode_fh,
.fh_to_dentry = nilfs_fh_to_dentry,
.fh_to_parent = nilfs_fh_to_parent,
.get_parent = nilfs_get_parent,
};

354
fs/nilfs2/nilfs.h Normal file
View file

@ -0,0 +1,354 @@
/*
* nilfs.h - NILFS local header file.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>
* Ryusuke Konishi <ryusuke@osrg.net>
*/
#ifndef _NILFS_H
#define _NILFS_H
#include <linux/kernel.h>
#include <linux/buffer_head.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/nilfs2_fs.h>
#include "the_nilfs.h"
#include "bmap.h"
/**
* struct nilfs_inode_info - nilfs inode data in memory
* @i_flags: inode flags
* @i_state: dynamic state flags
* @i_bmap: pointer on i_bmap_data
* @i_bmap_data: raw block mapping
* @i_xattr: <TODO>
* @i_dir_start_lookup: page index of last successful search
* @i_cno: checkpoint number for GC inode
* @i_btnode_cache: cached pages of b-tree nodes
* @i_dirty: list for connecting dirty files
* @xattr_sem: semaphore for extended attributes processing
* @i_bh: buffer contains disk inode
* @i_root: root object of the current filesystem tree
* @vfs_inode: VFS inode object
*/
struct nilfs_inode_info {
__u32 i_flags;
unsigned long i_state; /* Dynamic state flags */
struct nilfs_bmap *i_bmap;
struct nilfs_bmap i_bmap_data;
__u64 i_xattr; /* sector_t ??? */
__u32 i_dir_start_lookup;
__u64 i_cno; /* check point number for GC inode */
struct address_space i_btnode_cache;
struct list_head i_dirty; /* List for connecting dirty files */
#ifdef CONFIG_NILFS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_sem even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
*/
struct rw_semaphore xattr_sem;
#endif
struct buffer_head *i_bh; /* i_bh contains a new or dirty
disk inode */
struct nilfs_root *i_root;
struct inode vfs_inode;
};
static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode)
{
return container_of(inode, struct nilfs_inode_info, vfs_inode);
}
static inline struct nilfs_inode_info *
NILFS_BMAP_I(const struct nilfs_bmap *bmap)
{
return container_of(bmap, struct nilfs_inode_info, i_bmap_data);
}
static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
{
struct nilfs_inode_info *ii =
container_of(btnc, struct nilfs_inode_info, i_btnode_cache);
return &ii->vfs_inode;
}
/*
* Dynamic state flags of NILFS on-memory inode (i_state)
*/
enum {
NILFS_I_NEW = 0, /* Inode is newly created */
NILFS_I_DIRTY, /* The file is dirty */
NILFS_I_QUEUED, /* inode is in dirty_files list */
NILFS_I_BUSY, /* inode is grabbed by a segment
constructor */
NILFS_I_COLLECTED, /* All dirty blocks are collected */
NILFS_I_UPDATED, /* The file has been written back */
NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */
NILFS_I_BMAP, /* has bmap and btnode_cache */
NILFS_I_GCINODE, /* inode for GC, on memory only */
};
/*
* commit flags for nilfs_commit_super and nilfs_sync_super
*/
enum {
NILFS_SB_COMMIT = 0, /* Commit a super block alternately */
NILFS_SB_COMMIT_ALL /* Commit both super blocks */
};
/*
* Macros to check inode numbers
*/
#define NILFS_MDT_INO_BITS \
((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \
1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \
1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO))
#define NILFS_SYS_INO_BITS \
((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
#define NILFS_MDT_INODE(sb, ino) \
((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
#define NILFS_VALID_INODE(sb, ino) \
((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino))))
/**
* struct nilfs_transaction_info: context information for synchronization
* @ti_magic: Magic number
* @ti_save: Backup of journal_info field of task_struct
* @ti_flags: Flags
* @ti_count: Nest level
*/
struct nilfs_transaction_info {
u32 ti_magic;
void *ti_save;
/* This should never used. If this happens,
one of other filesystems has a bug. */
unsigned short ti_flags;
unsigned short ti_count;
};
/* ti_magic */
#define NILFS_TI_MAGIC 0xd9e392fb
/* ti_flags */
#define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */
#define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the
end of transaction. */
#define NILFS_TI_GC 0x0004 /* GC context */
#define NILFS_TI_COMMIT 0x0008 /* Change happened or not */
#define NILFS_TI_WRITER 0x0010 /* Constructor context */
int nilfs_transaction_begin(struct super_block *,
struct nilfs_transaction_info *, int);
int nilfs_transaction_commit(struct super_block *);
void nilfs_transaction_abort(struct super_block *);
static inline void nilfs_set_transaction_flag(unsigned int flag)
{
struct nilfs_transaction_info *ti = current->journal_info;
ti->ti_flags |= flag;
}
static inline int nilfs_test_transaction_flag(unsigned int flag)
{
struct nilfs_transaction_info *ti = current->journal_info;
if (ti == NULL || ti->ti_magic != NILFS_TI_MAGIC)
return 0;
return !!(ti->ti_flags & flag);
}
static inline int nilfs_doing_gc(void)
{
return nilfs_test_transaction_flag(NILFS_TI_GC);
}
static inline int nilfs_doing_construction(void)
{
return nilfs_test_transaction_flag(NILFS_TI_WRITER);
}
/*
* function prototype
*/
#ifdef CONFIG_NILFS_POSIX_ACL
#error "NILFS: not yet supported POSIX ACL"
extern int nilfs_acl_chmod(struct inode *);
extern int nilfs_init_acl(struct inode *, struct inode *);
#else
static inline int nilfs_acl_chmod(struct inode *inode)
{
return 0;
}
static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
{
inode->i_mode &= ~current_umask();
return 0;
}
#endif
#define NILFS_ATIME_DISABLE
/* Flags that should be inherited by new inodes from their parent. */
#define NILFS_FL_INHERITED \
(FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL | FS_SYNC_FL | \
FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL |\
FS_COMPRBLK_FL | FS_NOCOMP_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL)
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags)
{
if (S_ISDIR(mode))
return flags;
else if (S_ISREG(mode))
return flags & ~(FS_DIRSYNC_FL | FS_TOPDIR_FL);
else
return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}
/* dir.c */
extern int nilfs_add_link(struct dentry *, struct inode *);
extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
extern int nilfs_make_empty(struct inode *, struct inode *);
extern struct nilfs_dir_entry *
nilfs_find_entry(struct inode *, const struct qstr *, struct page **);
extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *);
extern int nilfs_empty_dir(struct inode *);
extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **);
extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
struct page *, struct inode *);
/* file.c */
extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
/* ioctl.c */
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
void **);
/* inode.c */
void nilfs_inode_add_blocks(struct inode *inode, int n);
void nilfs_inode_sub_blocks(struct inode *inode, int n);
extern struct inode *nilfs_new_inode(struct inode *, umode_t);
extern void nilfs_free_inode(struct inode *);
extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern void nilfs_set_inode_flags(struct inode *);
extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *);
extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
unsigned long ino);
struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
unsigned long ino);
struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
unsigned long ino);
extern struct inode *nilfs_iget_for_gc(struct super_block *sb,
unsigned long ino, __u64 cno);
extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
extern int nilfs_setattr(struct dentry *, struct iattr *);
extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
int nilfs_permission(struct inode *inode, int mask);
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
extern int __nilfs_mark_inode_dirty(struct inode *, int);
extern void nilfs_dirty_inode(struct inode *, int flags);
int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
static inline int nilfs_mark_inode_dirty(struct inode *inode)
{
return __nilfs_mark_inode_dirty(inode, I_DIRTY);
}
static inline int nilfs_mark_inode_dirty_sync(struct inode *inode)
{
return __nilfs_mark_inode_dirty(inode, I_DIRTY_SYNC);
}
/* super.c */
extern struct inode *nilfs_alloc_inode(struct super_block *);
extern void nilfs_destroy_inode(struct inode *);
extern __printf(3, 4)
void nilfs_error(struct super_block *, const char *, const char *, ...);
extern __printf(3, 4)
void nilfs_warning(struct super_block *, const char *, const char *, ...);
extern struct nilfs_super_block *
nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
extern int nilfs_store_magic_and_option(struct super_block *,
struct nilfs_super_block *, char *);
extern int nilfs_check_feature_compatibility(struct super_block *,
struct nilfs_super_block *);
extern void nilfs_set_log_cursor(struct nilfs_super_block *,
struct the_nilfs *);
struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
int flip);
int nilfs_commit_super(struct super_block *sb, int flag);
int nilfs_cleanup_super(struct super_block *sb);
int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
struct nilfs_root **root);
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
/* gcinode.c */
int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64,
struct buffer_head **);
int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64,
struct buffer_head **);
int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
int nilfs_init_gcinode(struct inode *inode);
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
/* sysfs.c */
int __init nilfs_sysfs_init(void);
void nilfs_sysfs_exit(void);
int nilfs_sysfs_create_device_group(struct super_block *);
void nilfs_sysfs_delete_device_group(struct the_nilfs *);
int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
/*
* Inodes and files operations
*/
extern const struct file_operations nilfs_dir_operations;
extern const struct inode_operations nilfs_file_inode_operations;
extern const struct file_operations nilfs_file_operations;
extern const struct address_space_operations nilfs_aops;
extern const struct inode_operations nilfs_dir_inode_operations;
extern const struct inode_operations nilfs_special_inode_operations;
extern const struct inode_operations nilfs_symlink_inode_operations;
/*
* filesystem type
*/
extern struct file_system_type nilfs_fs_type;
#endif /* _NILFS_H */

587
fs/nilfs2/page.c Normal file
View file

@ -0,0 +1,587 @@
/*
* page.c - buffer/page management specific to NILFS
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>,
* Seiji Kihara <kihara@osrg.net>.
*/
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/bitops.h>
#include <linux/page-flags.h>
#include <linux/list.h>
#include <linux/highmem.h>
#include <linux/pagevec.h>
#include <linux/gfp.h>
#include "nilfs.h"
#include "page.h"
#include "mdt.h"
#define NILFS_BUFFER_INHERENT_BITS \
((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
(1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
static struct buffer_head *
__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
int blkbits, unsigned long b_state)
{
unsigned long first_block;
struct buffer_head *bh;
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << blkbits, b_state);
first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
bh = nilfs_page_get_nth_block(page, block - first_block);
touch_buffer(bh);
wait_on_buffer(bh);
return bh;
}
struct buffer_head *nilfs_grab_buffer(struct inode *inode,
struct address_space *mapping,
unsigned long blkoff,
unsigned long b_state)
{
int blkbits = inode->i_blkbits;
pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
struct page *page;
struct buffer_head *bh;
page = grab_cache_page(mapping, index);
if (unlikely(!page))
return NULL;
bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
if (unlikely(!bh)) {
unlock_page(page);
page_cache_release(page);
return NULL;
}
return bh;
}
/**
* nilfs_forget_buffer - discard dirty state
* @inode: owner inode of the buffer
* @bh: buffer head of the buffer to be discarded
*/
void nilfs_forget_buffer(struct buffer_head *bh)
{
struct page *page = bh->b_page;
lock_buffer(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
clear_buffer_nilfs_redirected(bh);
clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
if (nilfs_page_buffers_clean(page))
__nilfs_clear_page_dirty(page);
clear_buffer_uptodate(bh);
clear_buffer_mapped(bh);
bh->b_blocknr = -1;
ClearPageUptodate(page);
ClearPageMappedToDisk(page);
unlock_buffer(bh);
brelse(bh);
}
/**
* nilfs_copy_buffer -- copy buffer data and flags
* @dbh: destination buffer
* @sbh: source buffer
*/
void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
{
void *kaddr0, *kaddr1;
unsigned long bits;
struct page *spage = sbh->b_page, *dpage = dbh->b_page;
struct buffer_head *bh;
kaddr0 = kmap_atomic(spage);
kaddr1 = kmap_atomic(dpage);
memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
kunmap_atomic(kaddr1);
kunmap_atomic(kaddr0);
dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
dbh->b_blocknr = sbh->b_blocknr;
dbh->b_bdev = sbh->b_bdev;
bh = dbh;
bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
while ((bh = bh->b_this_page) != dbh) {
lock_buffer(bh);
bits &= bh->b_state;
unlock_buffer(bh);
}
if (bits & (1UL << BH_Uptodate))
SetPageUptodate(dpage);
else
ClearPageUptodate(dpage);
if (bits & (1UL << BH_Mapped))
SetPageMappedToDisk(dpage);
else
ClearPageMappedToDisk(dpage);
}
/**
* nilfs_page_buffers_clean - check if a page has dirty buffers or not.
* @page: page to be checked
*
* nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
* Otherwise, it returns non-zero value.
*/
int nilfs_page_buffers_clean(struct page *page)
{
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
do {
if (buffer_dirty(bh))
return 0;
bh = bh->b_this_page;
} while (bh != head);
return 1;
}
void nilfs_page_bug(struct page *page)
{
struct address_space *m;
unsigned long ino;
if (unlikely(!page)) {
printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
return;
}
m = page->mapping;
ino = m ? m->host->i_ino : 0;
printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
page, atomic_read(&page->_count),
(unsigned long long)page->index, page->flags, m, ino);
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
int i = 0;
bh = head = page_buffers(page);
do {
printk(KERN_CRIT
" BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
i++, bh, atomic_read(&bh->b_count),
(unsigned long long)bh->b_blocknr, bh->b_state);
bh = bh->b_this_page;
} while (bh != head);
}
}
/**
* nilfs_copy_page -- copy the page with buffers
* @dst: destination page
* @src: source page
* @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
*
* This function is for both data pages and btnode pages. The dirty flag
* should be treated by caller. The page must not be under i/o.
* Both src and dst page must be locked
*/
static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
{
struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
BUG_ON(PageWriteback(dst));
sbh = sbufs = page_buffers(src);
if (!page_has_buffers(dst))
create_empty_buffers(dst, sbh->b_size, 0);
if (copy_dirty)
mask |= (1UL << BH_Dirty);
dbh = dbufs = page_buffers(dst);
do {
lock_buffer(sbh);
lock_buffer(dbh);
dbh->b_state = sbh->b_state & mask;
dbh->b_blocknr = sbh->b_blocknr;
dbh->b_bdev = sbh->b_bdev;
sbh = sbh->b_this_page;
dbh = dbh->b_this_page;
} while (dbh != dbufs);
copy_highpage(dst, src);
if (PageUptodate(src) && !PageUptodate(dst))
SetPageUptodate(dst);
else if (!PageUptodate(src) && PageUptodate(dst))
ClearPageUptodate(dst);
if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
SetPageMappedToDisk(dst);
else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
ClearPageMappedToDisk(dst);
do {
unlock_buffer(sbh);
unlock_buffer(dbh);
sbh = sbh->b_this_page;
dbh = dbh->b_this_page;
} while (dbh != dbufs);
}
int nilfs_copy_dirty_pages(struct address_space *dmap,
struct address_space *smap)
{
struct pagevec pvec;
unsigned int i;
pgoff_t index = 0;
int err = 0;
pagevec_init(&pvec, 0);
repeat:
if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
PAGEVEC_SIZE))
return 0;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i], *dpage;
lock_page(page);
if (unlikely(!PageDirty(page)))
NILFS_PAGE_BUG(page, "inconsistent dirty state");
dpage = grab_cache_page(dmap, page->index);
if (unlikely(!dpage)) {
/* No empty page is added to the page cache */
err = -ENOMEM;
unlock_page(page);
break;
}
if (unlikely(!page_has_buffers(page)))
NILFS_PAGE_BUG(page,
"found empty page in dat page cache");
nilfs_copy_page(dpage, page, 1);
__set_page_dirty_nobuffers(dpage);
unlock_page(dpage);
page_cache_release(dpage);
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
if (likely(!err))
goto repeat;
return err;
}
/**
* nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
* @dmap: destination page cache
* @smap: source page cache
*
* No pages must no be added to the cache during this process.
* This must be ensured by the caller.
*/
void nilfs_copy_back_pages(struct address_space *dmap,
struct address_space *smap)
{
struct pagevec pvec;
unsigned int i, n;
pgoff_t index = 0;
int err;
pagevec_init(&pvec, 0);
repeat:
n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
if (!n)
return;
index = pvec.pages[n - 1]->index + 1;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i], *dpage;
pgoff_t offset = page->index;
lock_page(page);
dpage = find_lock_page(dmap, offset);
if (dpage) {
/* override existing page on the destination cache */
WARN_ON(PageDirty(dpage));
nilfs_copy_page(dpage, page, 0);
unlock_page(dpage);
page_cache_release(dpage);
} else {
struct page *page2;
/* move the page to the destination cache */
spin_lock_irq(&smap->tree_lock);
page2 = radix_tree_delete(&smap->page_tree, offset);
WARN_ON(page2 != page);
smap->nrpages--;
spin_unlock_irq(&smap->tree_lock);
spin_lock_irq(&dmap->tree_lock);
err = radix_tree_insert(&dmap->page_tree, offset, page);
if (unlikely(err < 0)) {
WARN_ON(err == -EEXIST);
page->mapping = NULL;
page_cache_release(page); /* for cache */
} else {
page->mapping = dmap;
dmap->nrpages++;
if (PageDirty(page))
radix_tree_tag_set(&dmap->page_tree,
offset,
PAGECACHE_TAG_DIRTY);
}
spin_unlock_irq(&dmap->tree_lock);
}
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
goto repeat;
}
/**
* nilfs_clear_dirty_pages - discard dirty pages in address space
* @mapping: address space with dirty pages for discarding
* @silent: suppress [true] or print [false] warning messages
*/
void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
{
struct pagevec pvec;
unsigned int i;
pgoff_t index = 0;
pagevec_init(&pvec, 0);
while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
PAGEVEC_SIZE)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
lock_page(page);
nilfs_clear_dirty_page(page, silent);
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
}
}
/**
* nilfs_clear_dirty_page - discard dirty page
* @page: dirty page that will be discarded
* @silent: suppress [true] or print [false] warning messages
*/
void nilfs_clear_dirty_page(struct page *page, bool silent)
{
struct inode *inode = page->mapping->host;
struct super_block *sb = inode->i_sb;
BUG_ON(!PageLocked(page));
if (!silent) {
nilfs_warning(sb, __func__,
"discard page: offset %lld, ino %lu",
page_offset(page), inode->i_ino);
}
ClearPageUptodate(page);
ClearPageMappedToDisk(page);
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
do {
lock_buffer(bh);
if (!silent) {
nilfs_warning(sb, __func__,
"discard block %llu, size %zu",
(u64)bh->b_blocknr, bh->b_size);
}
clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
clear_buffer_nilfs_redirected(bh);
clear_buffer_uptodate(bh);
clear_buffer_mapped(bh);
unlock_buffer(bh);
} while (bh = bh->b_this_page, bh != head);
}
__nilfs_clear_page_dirty(page);
}
unsigned nilfs_page_count_clean_buffers(struct page *page,
unsigned from, unsigned to)
{
unsigned block_start, block_end;
struct buffer_head *bh, *head;
unsigned nc = 0;
for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + bh->b_size;
if (block_end > from && block_start < to && !buffer_dirty(bh))
nc++;
}
return nc;
}
void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
struct backing_dev_info *bdi)
{
mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->private_data = NULL;
mapping->backing_dev_info = bdi;
mapping->a_ops = &empty_aops;
}
/*
* NILFS2 needs clear_page_dirty() in the following two cases:
*
* 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
* page dirty flags when it copies back pages from the shadow cache
* (gcdat->{i_mapping,i_btnode_cache}) to its original cache
* (dat->{i_mapping,i_btnode_cache}).
*
* 2) Some B-tree operations like insertion or deletion may dispose buffers
* in dirty state, and this needs to cancel the dirty state of their pages.
*/
int __nilfs_clear_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
if (mapping) {
spin_lock_irq(&mapping->tree_lock);
if (test_bit(PG_dirty, &page->flags)) {
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&mapping->tree_lock);
return clear_page_dirty_for_io(page);
}
spin_unlock_irq(&mapping->tree_lock);
return 0;
}
return TestClearPageDirty(page);
}
/**
* nilfs_find_uncommitted_extent - find extent of uncommitted data
* @inode: inode
* @start_blk: start block offset (in)
* @blkoff: start offset of the found extent (out)
*
* This function searches an extent of buffers marked "delayed" which
* starts from a block offset equal to or larger than @start_blk. If
* such an extent was found, this will store the start offset in
* @blkoff and return its length in blocks. Otherwise, zero is
* returned.
*/
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff)
{
unsigned int i;
pgoff_t index;
unsigned int nblocks_in_page;
unsigned long length = 0;
sector_t b;
struct pagevec pvec;
struct page *page;
if (inode->i_mapping->nrpages == 0)
return 0;
index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec, 0);
repeat:
pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
pvec.pages);
if (pvec.nr == 0)
return length;
if (length > 0 && pvec.pages[0]->index > index)
goto out;
b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
i = 0;
do {
page = pvec.pages[i];
lock_page(page);
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
do {
if (b < start_blk)
continue;
if (buffer_delay(bh)) {
if (length == 0)
*blkoff = b;
length++;
} else if (length > 0) {
goto out_locked;
}
} while (++b, bh = bh->b_this_page, bh != head);
} else {
if (length > 0)
goto out_locked;
b += nblocks_in_page;
}
unlock_page(page);
} while (++i < pagevec_count(&pvec));
index = page->index + 1;
pagevec_release(&pvec);
cond_resched();
goto repeat;
out_locked:
unlock_page(page);
out:
pagevec_release(&pvec);
return length;
}

81
fs/nilfs2/page.h Normal file
View file

@ -0,0 +1,81 @@
/*
* page.h - buffer/page management specific to NILFS
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>,
* Seiji Kihara <kihara@osrg.net>.
*/
#ifndef _NILFS_PAGE_H
#define _NILFS_PAGE_H
#include <linux/buffer_head.h>
#include "nilfs.h"
/*
* Extended buffer state bits
*/
enum {
BH_NILFS_Allocated = BH_PrivateStart,
BH_NILFS_Node,
BH_NILFS_Volatile,
BH_NILFS_Checked,
BH_NILFS_Redirected,
};
BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
int __nilfs_clear_page_dirty(struct page *);
struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
unsigned long, unsigned long);
void nilfs_forget_buffer(struct buffer_head *);
void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
int nilfs_page_buffers_clean(struct page *);
void nilfs_page_bug(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_page(struct page *, bool);
void nilfs_clear_dirty_pages(struct address_space *, bool);
void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
struct backing_dev_info *bdi);
unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff);
#define NILFS_PAGE_BUG(page, m, a...) \
do { nilfs_page_bug(page); BUG(); } while (0)
static inline struct buffer_head *
nilfs_page_get_nth_block(struct page *page, unsigned int count)
{
struct buffer_head *bh = page_buffers(page);
while (count-- > 0)
bh = bh->b_this_page;
get_bh(bh);
return bh;
}
#endif /* _NILFS_PAGE_H */

964
fs/nilfs2/recovery.c Normal file
View file

@ -0,0 +1,964 @@
/*
* recovery.c - NILFS recovery logic
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*/
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
#include "sufile.h"
#include "page.h"
#include "segbuf.h"
/*
* Segment check result
*/
enum {
NILFS_SEG_VALID,
NILFS_SEG_NO_SUPER_ROOT,
NILFS_SEG_FAIL_IO,
NILFS_SEG_FAIL_MAGIC,
NILFS_SEG_FAIL_SEQ,
NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
NILFS_SEG_FAIL_CHECKSUM_FULL,
NILFS_SEG_FAIL_CONSISTENCY,
};
/* work structure for recovery */
struct nilfs_recovery_block {
ino_t ino; /* Inode number of the file that this block
belongs to */
sector_t blocknr; /* block number */
__u64 vblocknr; /* virtual block number */
unsigned long blkoff; /* File offset of the data block (per block) */
struct list_head list;
};
static int nilfs_warn_segment_error(int err)
{
switch (err) {
case NILFS_SEG_FAIL_IO:
printk(KERN_WARNING
"NILFS warning: I/O error on loading last segment\n");
return -EIO;
case NILFS_SEG_FAIL_MAGIC:
printk(KERN_WARNING
"NILFS warning: Segment magic number invalid\n");
break;
case NILFS_SEG_FAIL_SEQ:
printk(KERN_WARNING
"NILFS warning: Sequence number mismatch\n");
break;
case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
printk(KERN_WARNING
"NILFS warning: Checksum error in super root\n");
break;
case NILFS_SEG_FAIL_CHECKSUM_FULL:
printk(KERN_WARNING
"NILFS warning: Checksum error in segment payload\n");
break;
case NILFS_SEG_FAIL_CONSISTENCY:
printk(KERN_WARNING
"NILFS warning: Inconsistent segment\n");
break;
case NILFS_SEG_NO_SUPER_ROOT:
printk(KERN_WARNING
"NILFS warning: No super root in the last segment\n");
break;
}
return -EINVAL;
}
/**
* nilfs_compute_checksum - compute checksum of blocks continuously
* @nilfs: nilfs object
* @bhs: buffer head of start block
* @sum: place to store result
* @offset: offset bytes in the first block
* @check_bytes: number of bytes to be checked
* @start: DBN of start block
* @nblock: number of blocks to be checked
*/
static int nilfs_compute_checksum(struct the_nilfs *nilfs,
struct buffer_head *bhs, u32 *sum,
unsigned long offset, u64 check_bytes,
sector_t start, unsigned long nblock)
{
unsigned int blocksize = nilfs->ns_blocksize;
unsigned long size;
u32 crc;
BUG_ON(offset >= blocksize);
check_bytes -= offset;
size = min_t(u64, check_bytes, blocksize - offset);
crc = crc32_le(nilfs->ns_crc_seed,
(unsigned char *)bhs->b_data + offset, size);
if (--nblock > 0) {
do {
struct buffer_head *bh;
bh = __bread(nilfs->ns_bdev, ++start, blocksize);
if (!bh)
return -EIO;
check_bytes -= size;
size = min_t(u64, check_bytes, blocksize);
crc = crc32_le(crc, bh->b_data, size);
brelse(bh);
} while (--nblock > 0);
}
*sum = crc;
return 0;
}
/**
* nilfs_read_super_root_block - read super root block
* @nilfs: nilfs object
* @sr_block: disk block number of the super root block
* @pbh: address of a buffer_head pointer to return super root buffer
* @check: CRC check flag
*/
int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
struct buffer_head **pbh, int check)
{
struct buffer_head *bh_sr;
struct nilfs_super_root *sr;
u32 crc;
int ret;
*pbh = NULL;
bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize);
if (unlikely(!bh_sr)) {
ret = NILFS_SEG_FAIL_IO;
goto failed;
}
sr = (struct nilfs_super_root *)bh_sr->b_data;
if (check) {
unsigned bytes = le16_to_cpu(sr->sr_bytes);
if (bytes == 0 || bytes > nilfs->ns_blocksize) {
ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
goto failed_bh;
}
if (nilfs_compute_checksum(
nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes,
sr_block, 1)) {
ret = NILFS_SEG_FAIL_IO;
goto failed_bh;
}
if (crc != le32_to_cpu(sr->sr_sum)) {
ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
goto failed_bh;
}
}
*pbh = bh_sr;
return 0;
failed_bh:
brelse(bh_sr);
failed:
return nilfs_warn_segment_error(ret);
}
/**
* nilfs_read_log_header - read summary header of the specified log
* @nilfs: nilfs object
* @start_blocknr: start block number of the log
* @sum: pointer to return segment summary structure
*/
static struct buffer_head *
nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
struct nilfs_segment_summary **sum)
{
struct buffer_head *bh_sum;
bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
if (bh_sum)
*sum = (struct nilfs_segment_summary *)bh_sum->b_data;
return bh_sum;
}
/**
* nilfs_validate_log - verify consistency of log
* @nilfs: nilfs object
* @seg_seq: sequence number of segment
* @bh_sum: buffer head of summary block
* @sum: segment summary struct
*/
static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
struct buffer_head *bh_sum,
struct nilfs_segment_summary *sum)
{
unsigned long nblock;
u32 crc;
int ret;
ret = NILFS_SEG_FAIL_MAGIC;
if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC)
goto out;
ret = NILFS_SEG_FAIL_SEQ;
if (le64_to_cpu(sum->ss_seq) != seg_seq)
goto out;
nblock = le32_to_cpu(sum->ss_nblocks);
ret = NILFS_SEG_FAIL_CONSISTENCY;
if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment))
/* This limits the number of blocks read in the CRC check */
goto out;
ret = NILFS_SEG_FAIL_IO;
if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum),
((u64)nblock << nilfs->ns_blocksize_bits),
bh_sum->b_blocknr, nblock))
goto out;
ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
if (crc != le32_to_cpu(sum->ss_datasum))
goto out;
ret = 0;
out:
return ret;
}
/**
* nilfs_read_summary_info - read an item on summary blocks of a log
* @nilfs: nilfs object
* @pbh: the current buffer head on summary blocks [in, out]
* @offset: the current byte offset on summary blocks [in, out]
* @bytes: byte size of the item to be read
*/
static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
struct buffer_head **pbh,
unsigned int *offset, unsigned int bytes)
{
void *ptr;
sector_t blocknr;
BUG_ON((*pbh)->b_size < *offset);
if (bytes > (*pbh)->b_size - *offset) {
blocknr = (*pbh)->b_blocknr;
brelse(*pbh);
*pbh = __bread(nilfs->ns_bdev, blocknr + 1,
nilfs->ns_blocksize);
if (unlikely(!*pbh))
return NULL;
*offset = 0;
}
ptr = (*pbh)->b_data + *offset;
*offset += bytes;
return ptr;
}
/**
* nilfs_skip_summary_info - skip items on summary blocks of a log
* @nilfs: nilfs object
* @pbh: the current buffer head on summary blocks [in, out]
* @offset: the current byte offset on summary blocks [in, out]
* @bytes: byte size of the item to be skipped
* @count: number of items to be skipped
*/
static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
struct buffer_head **pbh,
unsigned int *offset, unsigned int bytes,
unsigned long count)
{
unsigned int rest_item_in_current_block
= ((*pbh)->b_size - *offset) / bytes;
if (count <= rest_item_in_current_block) {
*offset += bytes * count;
} else {
sector_t blocknr = (*pbh)->b_blocknr;
unsigned int nitem_per_block = (*pbh)->b_size / bytes;
unsigned int bcnt;
count -= rest_item_in_current_block;
bcnt = DIV_ROUND_UP(count, nitem_per_block);
*offset = bytes * (count - (bcnt - 1) * nitem_per_block);
brelse(*pbh);
*pbh = __bread(nilfs->ns_bdev, blocknr + bcnt,
nilfs->ns_blocksize);
}
}
/**
* nilfs_scan_dsync_log - get block information of a log written for data sync
* @nilfs: nilfs object
* @start_blocknr: start block number of the log
* @sum: log summary information
* @head: list head to add nilfs_recovery_block struct
*/
static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
struct nilfs_segment_summary *sum,
struct list_head *head)
{
struct buffer_head *bh;
unsigned int offset;
u32 nfinfo, sumbytes;
sector_t blocknr;
ino_t ino;
int err = -EIO;
nfinfo = le32_to_cpu(sum->ss_nfinfo);
if (!nfinfo)
return 0;
sumbytes = le32_to_cpu(sum->ss_sumbytes);
blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize);
bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
if (unlikely(!bh))
goto out;
offset = le16_to_cpu(sum->ss_bytes);
for (;;) {
unsigned long nblocks, ndatablk, nnodeblk;
struct nilfs_finfo *finfo;
finfo = nilfs_read_summary_info(nilfs, &bh, &offset,
sizeof(*finfo));
if (unlikely(!finfo))
goto out;
ino = le64_to_cpu(finfo->fi_ino);
nblocks = le32_to_cpu(finfo->fi_nblocks);
ndatablk = le32_to_cpu(finfo->fi_ndatablk);
nnodeblk = nblocks - ndatablk;
while (ndatablk-- > 0) {
struct nilfs_recovery_block *rb;
struct nilfs_binfo_v *binfo;
binfo = nilfs_read_summary_info(nilfs, &bh, &offset,
sizeof(*binfo));
if (unlikely(!binfo))
goto out;
rb = kmalloc(sizeof(*rb), GFP_NOFS);
if (unlikely(!rb)) {
err = -ENOMEM;
goto out;
}
rb->ino = ino;
rb->blocknr = blocknr++;
rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
/* INIT_LIST_HEAD(&rb->list); */
list_add_tail(&rb->list, head);
}
if (--nfinfo == 0)
break;
blocknr += nnodeblk; /* always 0 for data sync logs */
nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64),
nnodeblk);
if (unlikely(!bh))
goto out;
}
err = 0;
out:
brelse(bh); /* brelse(NULL) is just ignored */
return err;
}
static void dispose_recovery_list(struct list_head *head)
{
while (!list_empty(head)) {
struct nilfs_recovery_block *rb;
rb = list_first_entry(head, struct nilfs_recovery_block, list);
list_del(&rb->list);
kfree(rb);
}
}
struct nilfs_segment_entry {
struct list_head list;
__u64 segnum;
};
static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
{
struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
if (unlikely(!ent))
return -ENOMEM;
ent->segnum = segnum;
INIT_LIST_HEAD(&ent->list);
list_add_tail(&ent->list, head);
return 0;
}
void nilfs_dispose_segment_list(struct list_head *head)
{
while (!list_empty(head)) {
struct nilfs_segment_entry *ent;
ent = list_first_entry(head, struct nilfs_segment_entry, list);
list_del(&ent->list);
kfree(ent);
}
}
static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
struct super_block *sb,
struct nilfs_recovery_info *ri)
{
struct list_head *head = &ri->ri_used_segments;
struct nilfs_segment_entry *ent, *n;
struct inode *sufile = nilfs->ns_sufile;
__u64 segnum[4];
int err;
int i;
segnum[0] = nilfs->ns_segnum;
segnum[1] = nilfs->ns_nextnum;
segnum[2] = ri->ri_segnum;
segnum[3] = ri->ri_nextnum;
/*
* Releasing the next segment of the latest super root.
* The next segment is invalidated by this recovery.
*/
err = nilfs_sufile_free(sufile, segnum[1]);
if (unlikely(err))
goto failed;
for (i = 1; i < 4; i++) {
err = nilfs_segment_list_add(head, segnum[i]);
if (unlikely(err))
goto failed;
}
/*
* Collecting segments written after the latest super root.
* These are marked dirty to avoid being reallocated in the next write.
*/
list_for_each_entry_safe(ent, n, head, list) {
if (ent->segnum != segnum[0]) {
err = nilfs_sufile_scrap(sufile, ent->segnum);
if (unlikely(err))
goto failed;
}
list_del(&ent->list);
kfree(ent);
}
/* Allocate new segments for recovery */
err = nilfs_sufile_alloc(sufile, &segnum[0]);
if (unlikely(err))
goto failed;
nilfs->ns_pseg_offset = 0;
nilfs->ns_seg_seq = ri->ri_seq + 2;
nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
failed:
/* No need to recover sufile because it will be destroyed on error */
return err;
}
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
struct page *page)
{
struct buffer_head *bh_org;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
if (unlikely(!bh_org))
return -EIO;
kaddr = kmap_atomic(page);
memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
}
static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
struct super_block *sb,
struct nilfs_root *root,
struct list_head *head,
unsigned long *nr_salvaged_blocks)
{
struct inode *inode;
struct nilfs_recovery_block *rb, *n;
unsigned blocksize = nilfs->ns_blocksize;
struct page *page;
loff_t pos;
int err = 0, err2 = 0;
list_for_each_entry_safe(rb, n, head, list) {
inode = nilfs_iget(sb, root, rb->ino);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
goto failed_inode;
}
pos = rb->blkoff << inode->i_blkbits;
err = block_write_begin(inode->i_mapping, pos, blocksize,
0, &page, nilfs_get_block);
if (unlikely(err)) {
loff_t isize = inode->i_size;
if (pos + blocksize > isize)
nilfs_write_failed(inode->i_mapping,
pos + blocksize);
goto failed_inode;
}
err = nilfs_recovery_copy_block(nilfs, rb, page);
if (unlikely(err))
goto failed_page;
err = nilfs_set_file_dirty(inode, 1);
if (unlikely(err))
goto failed_page;
block_write_end(NULL, inode->i_mapping, pos, blocksize,
blocksize, page, NULL);
unlock_page(page);
page_cache_release(page);
(*nr_salvaged_blocks)++;
goto next;
failed_page:
unlock_page(page);
page_cache_release(page);
failed_inode:
printk(KERN_WARNING
"NILFS warning: error recovering data block "
"(err=%d, ino=%lu, block-offset=%llu)\n",
err, (unsigned long)rb->ino,
(unsigned long long)rb->blkoff);
if (!err2)
err2 = err;
next:
iput(inode); /* iput(NULL) is just ignored */
list_del_init(&rb->list);
kfree(rb);
}
return err2;
}
/**
* nilfs_do_roll_forward - salvage logical segments newer than the latest
* checkpoint
* @nilfs: nilfs object
* @sb: super block instance
* @ri: pointer to a nilfs_recovery_info
*/
static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
struct super_block *sb,
struct nilfs_root *root,
struct nilfs_recovery_info *ri)
{
struct buffer_head *bh_sum = NULL;
struct nilfs_segment_summary *sum;
sector_t pseg_start;
sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
unsigned long nsalvaged_blocks = 0;
unsigned int flags;
u64 seg_seq;
__u64 segnum, nextnum = 0;
int empty_seg = 0;
int err = 0, ret;
LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */
enum {
RF_INIT_ST,
RF_DSYNC_ST, /* scanning data-sync segments */
};
int state = RF_INIT_ST;
pseg_start = ri->ri_lsegs_start;
seg_seq = ri->ri_lsegs_start_seq;
segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
brelse(bh_sum);
bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
if (!bh_sum) {
err = -EIO;
goto failed;
}
ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
if (ret) {
if (ret == NILFS_SEG_FAIL_IO) {
err = -EIO;
goto failed;
}
goto strayed;
}
flags = le16_to_cpu(sum->ss_flags);
if (flags & NILFS_SS_SR)
goto confused;
/* Found a valid partial segment; do recovery actions */
nextnum = nilfs_get_segnum_of_block(nilfs,
le64_to_cpu(sum->ss_next));
empty_seg = 0;
nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
if (!(flags & NILFS_SS_GC))
nilfs->ns_nongc_ctime = nilfs->ns_ctime;
switch (state) {
case RF_INIT_ST:
if (!(flags & NILFS_SS_LOGBGN) ||
!(flags & NILFS_SS_SYNDT))
goto try_next_pseg;
state = RF_DSYNC_ST;
/* Fall through */
case RF_DSYNC_ST:
if (!(flags & NILFS_SS_SYNDT))
goto confused;
err = nilfs_scan_dsync_log(nilfs, pseg_start, sum,
&dsync_blocks);
if (unlikely(err))
goto failed;
if (flags & NILFS_SS_LOGEND) {
err = nilfs_recover_dsync_blocks(
nilfs, sb, root, &dsync_blocks,
&nsalvaged_blocks);
if (unlikely(err))
goto failed;
state = RF_INIT_ST;
}
break; /* Fall through to try_next_pseg */
}
try_next_pseg:
if (pseg_start == ri->ri_lsegs_end)
break;
pseg_start += le32_to_cpu(sum->ss_nblocks);
if (pseg_start < seg_end)
continue;
goto feed_segment;
strayed:
if (pseg_start == ri->ri_lsegs_end)
break;
feed_segment:
/* Looking to the next full segment */
if (empty_seg++)
break;
seg_seq++;
segnum = nextnum;
nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
pseg_start = seg_start;
}
if (nsalvaged_blocks) {
printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
sb->s_id, nsalvaged_blocks);
ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
}
out:
brelse(bh_sum);
dispose_recovery_list(&dsync_blocks);
return err;
confused:
err = -EINVAL;
failed:
printk(KERN_ERR
"NILFS (device %s): Error roll-forwarding "
"(err=%d, pseg block=%llu). ",
sb->s_id, err, (unsigned long long)pseg_start);
goto out;
}
static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
struct nilfs_recovery_info *ri)
{
struct buffer_head *bh;
int err;
if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
return;
bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
BUG_ON(!bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_dirty(bh);
err = sync_dirty_buffer(bh);
if (unlikely(err))
printk(KERN_WARNING
"NILFS warning: buffer sync write failed during "
"post-cleaning of recovery.\n");
brelse(bh);
}
/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
* @sb: super block instance
* @ri: pointer to a nilfs_recovery_info struct to store search results.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error code is returned.
*
* %-EINVAL - Inconsistent filesystem state.
*
* %-EIO - I/O error
*
* %-ENOSPC - No space left on device (only in a panic state).
*
* %-ERESTARTSYS - Interrupted.
*
* %-ENOMEM - Insufficient memory available.
*/
int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
struct super_block *sb,
struct nilfs_recovery_info *ri)
{
struct nilfs_root *root;
int err;
if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
return 0;
err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
if (unlikely(err)) {
printk(KERN_ERR
"NILFS: error loading the latest checkpoint.\n");
return err;
}
err = nilfs_do_roll_forward(nilfs, sb, root, ri);
if (unlikely(err))
goto failed;
if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
if (unlikely(err)) {
printk(KERN_ERR "NILFS: Error preparing segments for "
"recovery.\n");
goto failed;
}
err = nilfs_attach_log_writer(sb, root);
if (unlikely(err))
goto failed;
set_nilfs_discontinued(nilfs);
err = nilfs_construct_segment(sb);
nilfs_detach_log_writer(sb);
if (unlikely(err)) {
printk(KERN_ERR "NILFS: Oops! recovery failed. "
"(err=%d)\n", err);
goto failed;
}
nilfs_finish_roll_forward(nilfs, ri);
}
failed:
nilfs_put_root(root);
return err;
}
/**
* nilfs_search_super_root - search the latest valid super root
* @nilfs: the_nilfs
* @ri: pointer to a nilfs_recovery_info struct to store search results.
*
* nilfs_search_super_root() looks for the latest super-root from a partial
* segment pointed by the superblock. It sets up struct the_nilfs through
* this search. It fills nilfs_recovery_info (ri) required for recovery.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error code is returned.
*
* %-EINVAL - No valid segment found
*
* %-EIO - I/O error
*
* %-ENOMEM - Insufficient memory available.
*/
int nilfs_search_super_root(struct the_nilfs *nilfs,
struct nilfs_recovery_info *ri)
{
struct buffer_head *bh_sum = NULL;
struct nilfs_segment_summary *sum;
sector_t pseg_start, pseg_end, sr_pseg_start = 0;
sector_t seg_start, seg_end; /* range of full segment (block number) */
sector_t b, end;
unsigned long nblocks;
unsigned int flags;
u64 seg_seq;
__u64 segnum, nextnum = 0;
__u64 cno;
LIST_HEAD(segments);
int empty_seg = 0, scan_newer = 0;
int ret;
pseg_start = nilfs->ns_last_pseg;
seg_seq = nilfs->ns_last_seq;
cno = nilfs->ns_last_cno;
segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
/* Calculate range of segment */
nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
/* Read ahead segment */
b = seg_start;
while (b <= seg_end)
__breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize);
for (;;) {
brelse(bh_sum);
ret = NILFS_SEG_FAIL_IO;
bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
if (!bh_sum)
goto failed;
ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
if (ret) {
if (ret == NILFS_SEG_FAIL_IO)
goto failed;
goto strayed;
}
nblocks = le32_to_cpu(sum->ss_nblocks);
pseg_end = pseg_start + nblocks - 1;
if (unlikely(pseg_end > seg_end)) {
ret = NILFS_SEG_FAIL_CONSISTENCY;
goto strayed;
}
/* A valid partial segment */
ri->ri_pseg_start = pseg_start;
ri->ri_seq = seg_seq;
ri->ri_segnum = segnum;
nextnum = nilfs_get_segnum_of_block(nilfs,
le64_to_cpu(sum->ss_next));
ri->ri_nextnum = nextnum;
empty_seg = 0;
flags = le16_to_cpu(sum->ss_flags);
if (!(flags & NILFS_SS_SR) && !scan_newer) {
/* This will never happen because a superblock
(last_segment) always points to a pseg
having a super root. */
ret = NILFS_SEG_FAIL_CONSISTENCY;
goto failed;
}
if (pseg_start == seg_start) {
nilfs_get_segment_range(nilfs, nextnum, &b, &end);
while (b <= end)
__breadahead(nilfs->ns_bdev, b++,
nilfs->ns_blocksize);
}
if (!(flags & NILFS_SS_SR)) {
if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) {
ri->ri_lsegs_start = pseg_start;
ri->ri_lsegs_start_seq = seg_seq;
}
if (flags & NILFS_SS_LOGEND)
ri->ri_lsegs_end = pseg_start;
goto try_next_pseg;
}
/* A valid super root was found. */
ri->ri_cno = cno++;
ri->ri_super_root = pseg_end;
ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
nilfs_dispose_segment_list(&segments);
sr_pseg_start = pseg_start;
nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start;
nilfs->ns_seg_seq = seg_seq;
nilfs->ns_segnum = segnum;
nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */
nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
nilfs->ns_nextnum = nextnum;
if (scan_newer)
ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
else {
if (nilfs->ns_mount_state & NILFS_VALID_FS)
goto super_root_found;
scan_newer = 1;
}
try_next_pseg:
/* Standing on a course, or met an inconsistent state */
pseg_start += nblocks;
if (pseg_start < seg_end)
continue;
goto feed_segment;
strayed:
/* Off the trail */
if (!scan_newer)
/*
* This can happen if a checkpoint was written without
* barriers, or as a result of an I/O failure.
*/
goto failed;
feed_segment:
/* Looking to the next full segment */
if (empty_seg++)
goto super_root_found; /* found a valid super root */
ret = nilfs_segment_list_add(&segments, segnum);
if (unlikely(ret))
goto failed;
seg_seq++;
segnum = nextnum;
nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
pseg_start = seg_start;
}
super_root_found:
/* Updating pointers relating to the latest checkpoint */
brelse(bh_sum);
list_splice_tail(&segments, &ri->ri_used_segments);
nilfs->ns_last_pseg = sr_pseg_start;
nilfs->ns_last_seq = nilfs->ns_seg_seq;
nilfs->ns_last_cno = ri->ri_cno;
return 0;
failed:
brelse(bh_sum);
nilfs_dispose_segment_list(&segments);
return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
}

536
fs/nilfs2/segbuf.c Normal file
View file

@ -0,0 +1,536 @@
/*
* segbuf.c - NILFS segment buffer
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/crc32.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
#include "page.h"
#include "segbuf.h"
struct nilfs_write_info {
struct the_nilfs *nilfs;
struct bio *bio;
int start, end; /* The region to be submitted */
int rest_blocks;
int max_pages;
int nr_vecs;
sector_t blocknr;
};
static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
struct the_nilfs *nilfs);
static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf);
struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
{
struct nilfs_segment_buffer *segbuf;
segbuf = kmem_cache_alloc(nilfs_segbuf_cachep, GFP_NOFS);
if (unlikely(!segbuf))
return NULL;
segbuf->sb_super = sb;
INIT_LIST_HEAD(&segbuf->sb_list);
INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
segbuf->sb_super_root = NULL;
init_completion(&segbuf->sb_bio_event);
atomic_set(&segbuf->sb_err, 0);
segbuf->sb_nbio = 0;
return segbuf;
}
void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf)
{
kmem_cache_free(nilfs_segbuf_cachep, segbuf);
}
void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum,
unsigned long offset, struct the_nilfs *nilfs)
{
segbuf->sb_segnum = segnum;
nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start,
&segbuf->sb_fseg_end);
segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset;
segbuf->sb_rest_blocks =
segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
}
/**
* nilfs_segbuf_map_cont - map a new log behind a given log
* @segbuf: new segment buffer
* @prev: segment buffer containing a log to be continued
*/
void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
struct nilfs_segment_buffer *prev)
{
segbuf->sb_segnum = prev->sb_segnum;
segbuf->sb_fseg_start = prev->sb_fseg_start;
segbuf->sb_fseg_end = prev->sb_fseg_end;
segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks;
segbuf->sb_rest_blocks =
segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
}
void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf,
__u64 nextnum, struct the_nilfs *nilfs)
{
segbuf->sb_nextnum = nextnum;
segbuf->sb_sum.next = nilfs_get_segment_start_blocknr(nilfs, nextnum);
}
int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
{
struct buffer_head *bh;
bh = sb_getblk(segbuf->sb_super,
segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk);
if (unlikely(!bh))
return -ENOMEM;
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
struct buffer_head **bhp)
{
struct buffer_head *bh;
bh = sb_getblk(segbuf->sb_super,
segbuf->sb_pseg_start + segbuf->sb_sum.nblocks);
if (unlikely(!bh))
return -ENOMEM;
nilfs_segbuf_add_payload_buffer(segbuf, bh);
*bhp = bh;
return 0;
}
int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
time_t ctime, __u64 cno)
{
int err;
segbuf->sb_sum.nblocks = segbuf->sb_sum.nsumblk = 0;
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
return err;
segbuf->sb_sum.flags = flags;
segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
segbuf->sb_sum.ctime = ctime;
segbuf->sb_sum.cno = cno;
return 0;
}
/*
* Setup segment summary
*/
void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf)
{
struct nilfs_segment_summary *raw_sum;
struct buffer_head *bh_sum;
bh_sum = list_entry(segbuf->sb_segsum_buffers.next,
struct buffer_head, b_assoc_buffers);
raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data;
raw_sum->ss_magic = cpu_to_le32(NILFS_SEGSUM_MAGIC);
raw_sum->ss_bytes = cpu_to_le16(sizeof(*raw_sum));
raw_sum->ss_flags = cpu_to_le16(segbuf->sb_sum.flags);
raw_sum->ss_seq = cpu_to_le64(segbuf->sb_sum.seg_seq);
raw_sum->ss_create = cpu_to_le64(segbuf->sb_sum.ctime);
raw_sum->ss_next = cpu_to_le64(segbuf->sb_sum.next);
raw_sum->ss_nblocks = cpu_to_le32(segbuf->sb_sum.nblocks);
raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo);
raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes);
raw_sum->ss_pad = 0;
raw_sum->ss_cno = cpu_to_le64(segbuf->sb_sum.cno);
}
/*
* CRC calculation routines
*/
static void
nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, u32 seed)
{
struct buffer_head *bh;
struct nilfs_segment_summary *raw_sum;
unsigned long size, bytes = segbuf->sb_sum.sumbytes;
u32 crc;
bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head,
b_assoc_buffers);
raw_sum = (struct nilfs_segment_summary *)bh->b_data;
size = min_t(unsigned long, bytes, bh->b_size);
crc = crc32_le(seed,
(unsigned char *)raw_sum +
sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum),
size - (sizeof(raw_sum->ss_datasum) +
sizeof(raw_sum->ss_sumsum)));
list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
bytes -= size;
size = min_t(unsigned long, bytes, bh->b_size);
crc = crc32_le(crc, bh->b_data, size);
}
raw_sum->ss_sumsum = cpu_to_le32(crc);
}
static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
u32 seed)
{
struct buffer_head *bh;
struct nilfs_segment_summary *raw_sum;
void *kaddr;
u32 crc;
bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head,
b_assoc_buffers);
raw_sum = (struct nilfs_segment_summary *)bh->b_data;
crc = crc32_le(seed,
(unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum),
bh->b_size - sizeof(raw_sum->ss_datasum));
list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
crc = crc32_le(crc, bh->b_data, bh->b_size);
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
kaddr = kmap_atomic(bh->b_page);
crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
kunmap_atomic(kaddr);
}
raw_sum->ss_datasum = cpu_to_le32(crc);
}
static void
nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
u32 seed)
{
struct nilfs_super_root *raw_sr;
struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
unsigned srsize;
u32 crc;
raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
crc = crc32_le(seed,
(unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
srsize - sizeof(raw_sr->sr_sum));
raw_sr->sr_sum = cpu_to_le32(crc);
}
static void nilfs_release_buffers(struct list_head *list)
{
struct buffer_head *bh, *n;
list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
list_del_init(&bh->b_assoc_buffers);
brelse(bh);
}
}
static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
{
nilfs_release_buffers(&segbuf->sb_segsum_buffers);
nilfs_release_buffers(&segbuf->sb_payload_buffers);
segbuf->sb_super_root = NULL;
}
/*
* Iterators for segment buffers
*/
void nilfs_clear_logs(struct list_head *logs)
{
struct nilfs_segment_buffer *segbuf;
list_for_each_entry(segbuf, logs, sb_list)
nilfs_segbuf_clear(segbuf);
}
void nilfs_truncate_logs(struct list_head *logs,
struct nilfs_segment_buffer *last)
{
struct nilfs_segment_buffer *n, *segbuf;
segbuf = list_prepare_entry(last, logs, sb_list);
list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) {
list_del_init(&segbuf->sb_list);
nilfs_segbuf_clear(segbuf);
nilfs_segbuf_free(segbuf);
}
}
int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs)
{
struct nilfs_segment_buffer *segbuf;
int ret = 0;
list_for_each_entry(segbuf, logs, sb_list) {
ret = nilfs_segbuf_write(segbuf, nilfs);
if (ret)
break;
}
return ret;
}
int nilfs_wait_on_logs(struct list_head *logs)
{
struct nilfs_segment_buffer *segbuf;
int err, ret = 0;
list_for_each_entry(segbuf, logs, sb_list) {
err = nilfs_segbuf_wait(segbuf);
if (err && !ret)
ret = err;
}
return ret;
}
/**
* nilfs_add_checksums_on_logs - add checksums on the logs
* @logs: list of segment buffers storing target logs
* @seed: checksum seed value
*/
void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed)
{
struct nilfs_segment_buffer *segbuf;
list_for_each_entry(segbuf, logs, sb_list) {
if (segbuf->sb_super_root)
nilfs_segbuf_fill_in_super_root_crc(segbuf, seed);
nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
nilfs_segbuf_fill_in_data_crc(segbuf, seed);
}
}
/*
* BIO operations
*/
static void nilfs_end_bio_write(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct nilfs_segment_buffer *segbuf = bio->bi_private;
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
/* to be detected by nilfs_segbuf_submit_bio() */
}
if (!uptodate)
atomic_inc(&segbuf->sb_err);
bio_put(bio);
complete(&segbuf->sb_bio_event);
}
static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
struct nilfs_write_info *wi, int mode)
{
struct bio *bio = wi->bio;
int err;
if (segbuf->sb_nbio > 0 &&
bdi_write_congested(segbuf->sb_super->s_bdi)) {
wait_for_completion(&segbuf->sb_bio_event);
segbuf->sb_nbio--;
if (unlikely(atomic_read(&segbuf->sb_err))) {
bio_put(bio);
err = -EIO;
goto failed;
}
}
bio->bi_end_io = nilfs_end_bio_write;
bio->bi_private = segbuf;
bio_get(bio);
submit_bio(mode, bio);
segbuf->sb_nbio++;
if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
bio_put(bio);
err = -EOPNOTSUPP;
goto failed;
}
bio_put(bio);
wi->bio = NULL;
wi->rest_blocks -= wi->end - wi->start;
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end;
return 0;
failed:
wi->bio = NULL;
return err;
}
/**
* nilfs_alloc_seg_bio - allocate a new bio for writing log
* @nilfs: nilfs object
* @start: start block number of the bio
* @nr_vecs: request size of page vector.
*
* Return Value: On success, pointer to the struct bio is returned.
* On error, NULL is returned.
*/
static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
int nr_vecs)
{
struct bio *bio;
bio = bio_alloc(GFP_NOIO, nr_vecs);
if (bio == NULL) {
while (!bio && (nr_vecs >>= 1))
bio = bio_alloc(GFP_NOIO, nr_vecs);
}
if (likely(bio)) {
bio->bi_bdev = nilfs->ns_bdev;
bio->bi_iter.bi_sector =
start << (nilfs->ns_blocksize_bits - 9);
}
return bio;
}
static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
struct nilfs_write_info *wi)
{
wi->bio = NULL;
wi->rest_blocks = segbuf->sb_sum.nblocks;
wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end = 0;
wi->blocknr = segbuf->sb_pseg_start;
}
static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
struct nilfs_write_info *wi,
struct buffer_head *bh, int mode)
{
int len, err;
BUG_ON(wi->nr_vecs <= 0);
repeat:
if (!wi->bio) {
wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
wi->nr_vecs);
if (unlikely(!wi->bio))
return -ENOMEM;
}
len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
if (len == bh->b_size) {
wi->end++;
return 0;
}
/* bio is FULL */
err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
/* never submit current bh */
if (likely(!err))
goto repeat;
return err;
}
/**
* nilfs_segbuf_write - submit write requests of a log
* @segbuf: buffer storing a log to be written
* @nilfs: nilfs object
*
* Return Value: On Success, 0 is returned. On Error, one of the following
* negative error code is returned.
*
* %-EIO - I/O error
*
* %-ENOMEM - Insufficient memory available.
*/
static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
struct the_nilfs *nilfs)
{
struct nilfs_write_info wi;
struct buffer_head *bh;
int res = 0, rw = WRITE;
wi.nilfs = nilfs;
nilfs_segbuf_prepare_write(segbuf, &wi);
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
if (unlikely(res))
goto failed_bio;
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
if (unlikely(res))
goto failed_bio;
}
if (wi.bio) {
/*
* Last BIO is always sent through the following
* submission.
*/
rw |= REQ_SYNC;
res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
}
failed_bio:
return res;
}
/**
* nilfs_segbuf_wait - wait for completion of requested BIOs
* @segbuf: segment buffer
*
* Return Value: On Success, 0 is returned. On Error, one of the following
* negative error code is returned.
*
* %-EIO - I/O error
*/
static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
{
int err = 0;
if (!segbuf->sb_nbio)
return 0;
do {
wait_for_completion(&segbuf->sb_bio_event);
} while (--segbuf->sb_nbio > 0);
if (unlikely(atomic_read(&segbuf->sb_err) > 0)) {
printk(KERN_ERR "NILFS: IO error writing segment\n");
err = -EIO;
}
return err;
}

184
fs/nilfs2/segbuf.h Normal file
View file

@ -0,0 +1,184 @@
/*
* segbuf.h - NILFS Segment buffer prototypes and definitions
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#ifndef _NILFS_SEGBUF_H
#define _NILFS_SEGBUF_H
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
#include <linux/completion.h>
/**
* struct nilfs_segsum_info - On-memory segment summary
* @flags: Flags
* @nfinfo: Number of file information structures
* @nblocks: Number of blocks included in the partial segment
* @nsumblk: Number of summary blocks
* @sumbytes: Byte count of segment summary
* @nfileblk: Total number of file blocks
* @seg_seq: Segment sequence number
* @cno: Checkpoint number
* @ctime: Creation time
* @next: Block number of the next full segment
*/
struct nilfs_segsum_info {
unsigned int flags;
unsigned long nfinfo;
unsigned long nblocks;
unsigned long nsumblk;
unsigned long sumbytes;
unsigned long nfileblk;
u64 seg_seq;
__u64 cno;
time_t ctime;
sector_t next;
};
/**
* struct nilfs_segment_buffer - Segment buffer
* @sb_super: back pointer to a superblock struct
* @sb_list: List head to chain this structure
* @sb_sum: On-memory segment summary
* @sb_segnum: Index number of the full segment
* @sb_nextnum: Index number of the next full segment
* @sb_fseg_start: Start block number of the full segment
* @sb_fseg_end: End block number of the full segment
* @sb_pseg_start: Disk block number of partial segment
* @sb_rest_blocks: Number of residual blocks in the current segment
* @sb_segsum_buffers: List of buffers for segment summaries
* @sb_payload_buffers: List of buffers for segment payload
* @sb_super_root: Pointer to buffer storing a super root block (if exists)
* @sb_nbio: Number of flying bio requests
* @sb_err: I/O error status
* @sb_bio_event: Completion event of log writing
*/
struct nilfs_segment_buffer {
struct super_block *sb_super;
struct list_head sb_list;
/* Segment information */
struct nilfs_segsum_info sb_sum;
__u64 sb_segnum;
__u64 sb_nextnum;
sector_t sb_fseg_start, sb_fseg_end;
sector_t sb_pseg_start;
unsigned sb_rest_blocks;
/* Buffers */
struct list_head sb_segsum_buffers;
struct list_head sb_payload_buffers; /* including super root */
struct buffer_head *sb_super_root;
/* io status */
int sb_nbio;
atomic_t sb_err;
struct completion sb_bio_event;
};
#define NILFS_LIST_SEGBUF(head) \
list_entry((head), struct nilfs_segment_buffer, sb_list)
#define NILFS_NEXT_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.next)
#define NILFS_PREV_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.prev)
#define NILFS_LAST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->prev)
#define NILFS_FIRST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->next)
#define NILFS_SEGBUF_IS_LAST(segbuf, head) ((segbuf)->sb_list.next == (head))
#define nilfs_for_each_segbuf_before(s, t, h) \
for ((s) = NILFS_FIRST_SEGBUF(h); (s) != (t); \
(s) = NILFS_NEXT_SEGBUF(s))
#define NILFS_SEGBUF_FIRST_BH(head) \
(list_entry((head)->next, struct buffer_head, b_assoc_buffers))
#define NILFS_SEGBUF_NEXT_BH(bh) \
(list_entry((bh)->b_assoc_buffers.next, struct buffer_head, \
b_assoc_buffers))
#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head)
extern struct kmem_cache *nilfs_segbuf_cachep;
struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
void nilfs_segbuf_free(struct nilfs_segment_buffer *);
void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
struct the_nilfs *);
void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
struct nilfs_segment_buffer *prev);
void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
struct the_nilfs *);
int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64);
int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
struct buffer_head **);
void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *);
static inline int nilfs_segbuf_simplex(struct nilfs_segment_buffer *segbuf)
{
unsigned int flags = segbuf->sb_sum.flags;
return (flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) ==
(NILFS_SS_LOGBGN | NILFS_SS_LOGEND);
}
static inline int nilfs_segbuf_empty(struct nilfs_segment_buffer *segbuf)
{
return segbuf->sb_sum.nblocks == segbuf->sb_sum.nsumblk;
}
static inline void
nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf,
struct buffer_head *bh)
{
list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_segsum_buffers);
segbuf->sb_sum.nblocks++;
segbuf->sb_sum.nsumblk++;
}
static inline void
nilfs_segbuf_add_payload_buffer(struct nilfs_segment_buffer *segbuf,
struct buffer_head *bh)
{
list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_payload_buffers);
segbuf->sb_sum.nblocks++;
}
static inline void
nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf,
struct buffer_head *bh)
{
get_bh(bh);
nilfs_segbuf_add_payload_buffer(segbuf, bh);
segbuf->sb_sum.nfileblk++;
}
void nilfs_clear_logs(struct list_head *logs);
void nilfs_truncate_logs(struct list_head *logs,
struct nilfs_segment_buffer *last);
int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs);
int nilfs_wait_on_logs(struct list_head *logs);
void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed);
static inline void nilfs_destroy_logs(struct list_head *logs)
{
nilfs_truncate_logs(logs, NULL);
}
#endif /* _NILFS_SEGBUF_H */

2759
fs/nilfs2/segment.c Normal file

File diff suppressed because it is too large Load diff

251
fs/nilfs2/segment.h Normal file
View file

@ -0,0 +1,251 @@
/*
* segment.h - NILFS Segment constructor prototypes and definitions
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#ifndef _NILFS_SEGMENT_H
#define _NILFS_SEGMENT_H
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/nilfs2_fs.h>
#include "nilfs.h"
struct nilfs_root;
/**
* struct nilfs_recovery_info - Recovery information
* @ri_need_recovery: Recovery status
* @ri_super_root: Block number of the last super root
* @ri_ri_cno: Number of the last checkpoint
* @ri_lsegs_start: Region for roll-forwarding (start block number)
* @ri_lsegs_end: Region for roll-forwarding (end block number)
* @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
* @ri_used_segments: List of segments to be mark active
* @ri_pseg_start: Block number of the last partial segment
* @ri_seq: Sequence number on the last partial segment
* @ri_segnum: Segment number on the last partial segment
* @ri_nextnum: Next segment number on the last partial segment
*/
struct nilfs_recovery_info {
int ri_need_recovery;
sector_t ri_super_root;
__u64 ri_cno;
sector_t ri_lsegs_start;
sector_t ri_lsegs_end;
u64 ri_lsegs_start_seq;
struct list_head ri_used_segments;
sector_t ri_pseg_start;
u64 ri_seq;
__u64 ri_segnum;
__u64 ri_nextnum;
};
/* ri_need_recovery */
#define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */
#define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */
/**
* struct nilfs_cstage - Context of collection stage
* @scnt: Stage count
* @flags: State flags
* @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
* @gc_inode_ptr: Pointer on the list of gc-inodes
*/
struct nilfs_cstage {
int scnt;
unsigned flags;
struct nilfs_inode_info *dirty_file_ptr;
struct nilfs_inode_info *gc_inode_ptr;
};
struct nilfs_segment_buffer;
struct nilfs_segsum_pointer {
struct buffer_head *bh;
unsigned offset; /* offset in bytes */
};
/**
* struct nilfs_sc_info - Segment constructor information
* @sc_super: Back pointer to super_block struct
* @sc_root: root object of the current filesystem tree
* @sc_nblk_inc: Block count of current generation
* @sc_dirty_files: List of files to be written
* @sc_gc_inodes: List of GC inodes having blocks to be written
* @sc_iput_queue: list of inodes for which iput should be done
* @sc_iput_work: work struct to defer iput call
* @sc_freesegs: array of segment numbers to be freed
* @sc_nfreesegs: number of segments on @sc_freesegs
* @sc_dsync_inode: inode whose data pages are written for a sync operation
* @sc_dsync_start: start byte offset of data pages
* @sc_dsync_end: end byte offset of data pages (inclusive)
* @sc_segbufs: List of segment buffers
* @sc_write_logs: List of segment buffers to hold logs under writing
* @sc_segbuf_nblocks: Number of available blocks in segment buffers.
* @sc_curseg: Current segment buffer
* @sc_stage: Collection stage
* @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
* @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
* @sc_blk_cnt: Block count of a file
* @sc_datablk_cnt: Data block count of a file
* @sc_nblk_this_inc: Number of blocks included in the current logical segment
* @sc_seg_ctime: Creation time
* @sc_cno: checkpoint number of current log
* @sc_flags: Internal flags
* @sc_state_lock: spinlock for sc_state and so on
* @sc_state: Segctord state flags
* @sc_flush_request: inode bitmap of metadata files to be flushed
* @sc_wait_request: Client request queue
* @sc_wait_daemon: Daemon wait queue
* @sc_wait_task: Start/end wait queue to control segctord task
* @sc_seq_request: Request counter
* @sc_seq_accept: Accepted request count
* @sc_seq_done: Completion counter
* @sc_sync: Request of explicit sync operation
* @sc_interval: Timeout value of background construction
* @sc_mjcp_freq: Frequency of creating checkpoints
* @sc_lseg_stime: Start time of the latest logical segment
* @sc_watermark: Watermark for the number of dirty buffers
* @sc_timer: Timer for segctord
* @sc_task: current thread of segctord
*/
struct nilfs_sc_info {
struct super_block *sc_super;
struct nilfs_root *sc_root;
unsigned long sc_nblk_inc;
struct list_head sc_dirty_files;
struct list_head sc_gc_inodes;
struct list_head sc_iput_queue;
struct work_struct sc_iput_work;
__u64 *sc_freesegs;
size_t sc_nfreesegs;
struct nilfs_inode_info *sc_dsync_inode;
loff_t sc_dsync_start;
loff_t sc_dsync_end;
/* Segment buffers */
struct list_head sc_segbufs;
struct list_head sc_write_logs;
unsigned long sc_segbuf_nblocks;
struct nilfs_segment_buffer *sc_curseg;
struct nilfs_cstage sc_stage;
struct nilfs_segsum_pointer sc_finfo_ptr;
struct nilfs_segsum_pointer sc_binfo_ptr;
unsigned long sc_blk_cnt;
unsigned long sc_datablk_cnt;
unsigned long sc_nblk_this_inc;
time_t sc_seg_ctime;
__u64 sc_cno;
unsigned long sc_flags;
spinlock_t sc_state_lock;
unsigned long sc_state;
unsigned long sc_flush_request;
wait_queue_head_t sc_wait_request;
wait_queue_head_t sc_wait_daemon;
wait_queue_head_t sc_wait_task;
__u32 sc_seq_request;
__u32 sc_seq_accepted;
__u32 sc_seq_done;
int sc_sync;
unsigned long sc_interval;
unsigned long sc_mjcp_freq;
unsigned long sc_lseg_stime; /* in 1/HZ seconds */
unsigned long sc_watermark;
struct timer_list sc_timer;
struct task_struct *sc_task;
};
/* sc_flags */
enum {
NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */
NILFS_SC_UNCLOSED, /* Logical segment is not closed */
NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */
NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a
checkpoint */
NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files
other than DAT, cpfile, sufile, or files
moved by GC */
};
/* sc_state */
#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */
#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */
/*
* Constant parameters
*/
#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when
destroying segctord */
/*
* Default values of timeout, in seconds.
*/
#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks.
It triggers construction of a
logical segment with a super root */
#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root
creation */
/*
* The default threshold amount of data, in block counts.
*/
#define NILFS_SC_DEFAULT_WATERMARK 3600
/* super.c */
extern struct kmem_cache *nilfs_transaction_cachep;
/* segment.c */
extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
loff_t, loff_t);
extern void nilfs_flush_segment(struct super_block *, ino_t);
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
void **);
int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root);
void nilfs_detach_log_writer(struct super_block *sb);
/* recovery.c */
extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
struct buffer_head **, int);
extern int nilfs_search_super_root(struct the_nilfs *,
struct nilfs_recovery_info *);
int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb,
struct nilfs_recovery_info *ri);
extern void nilfs_dispose_segment_list(struct list_head *);
#endif /* _NILFS_SEGMENT_H */

1222
fs/nilfs2/sufile.c Normal file

File diff suppressed because it is too large Load diff

146
fs/nilfs2/sufile.h Normal file
View file

@ -0,0 +1,146 @@
/*
* sufile.h - NILFS segment usage file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
*/
#ifndef _NILFS_SUFILE_H
#define _NILFS_SUFILE_H
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/nilfs2_fs.h>
#include "mdt.h"
static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
{
return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
}
unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
int nilfs_sufile_alloc(struct inode *, __u64 *);
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
unsigned long nblocks, time_t modtime);
int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
size_t);
ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned , size_t);
int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *,
void (*dofunc)(struct inode *, __u64,
struct buffer_head *,
struct buffer_head *));
int nilfs_sufile_update(struct inode *, __u64, int,
void (*dofunc)(struct inode *, __u64,
struct buffer_head *,
struct buffer_head *));
void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep);
int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range);
/**
* nilfs_sufile_scrap - make a segment garbage
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
*/
static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
{
return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap);
}
/**
* nilfs_sufile_free - free segment
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
*/
static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
{
return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free);
}
/**
* nilfs_sufile_freev - free segments
* @sufile: inode of segment usage file
* @segnumv: array of segment numbers
* @nsegs: size of @segnumv array
* @ndone: place to store the number of freed segments
*/
static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
size_t nsegs, size_t *ndone)
{
return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone,
nilfs_sufile_do_free);
}
/**
* nilfs_sufile_cancel_freev - reallocate freeing segments
* @sufile: inode of segment usage file
* @segnumv: array of segment numbers
* @nsegs: size of @segnumv array
* @ndone: place to store the number of cancelled segments
*
* Return Value: On success, 0 is returned. On error, a negative error codes
* is returned.
*/
static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
__u64 *segnumv, size_t nsegs,
size_t *ndone)
{
return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone,
nilfs_sufile_do_cancel_free);
}
/**
* nilfs_sufile_set_error - mark a segment as erroneous
* @sufile: inode of segment usage file
* @segnum: segment number
*
* Description: nilfs_sufile_set_error() marks the segment specified by
* @segnum as erroneous. The error segment will never be used again.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-EINVAL - Invalid segment usage number.
*/
static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
{
return nilfs_sufile_update(sufile, segnum, 0,
nilfs_sufile_do_set_error);
}
#endif /* _NILFS_SUFILE_H */

1488
fs/nilfs2/super.c Normal file

File diff suppressed because it is too large Load diff

1137
fs/nilfs2/sysfs.c Normal file

File diff suppressed because it is too large Load diff

176
fs/nilfs2/sysfs.h Normal file
View file

@ -0,0 +1,176 @@
/*
* sysfs.h - sysfs support declarations.
*
* Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
* Copyright (C) 2014 HGST, Inc., a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
*/
#ifndef _NILFS_SYSFS_H
#define _NILFS_SYSFS_H
#include <linux/sysfs.h>
#define NILFS_ROOT_GROUP_NAME "nilfs2"
/*
* struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
* @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
* @sg_superblock_kobj_unregister: completion state
* @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
* @sg_segctor_kobj_unregister: completion state
* @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
* @sg_mounted_snapshots_kobj_unregister: completion state
* @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
* @sg_checkpoints_kobj_unregister: completion state
* @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
* @sg_segments_kobj_unregister: completion state
*/
struct nilfs_sysfs_dev_subgroups {
/* /sys/fs/<nilfs>/<device>/superblock */
struct kobject sg_superblock_kobj;
struct completion sg_superblock_kobj_unregister;
/* /sys/fs/<nilfs>/<device>/segctor */
struct kobject sg_segctor_kobj;
struct completion sg_segctor_kobj_unregister;
/* /sys/fs/<nilfs>/<device>/mounted_snapshots */
struct kobject sg_mounted_snapshots_kobj;
struct completion sg_mounted_snapshots_kobj_unregister;
/* /sys/fs/<nilfs>/<device>/checkpoints */
struct kobject sg_checkpoints_kobj;
struct completion sg_checkpoints_kobj_unregister;
/* /sys/fs/<nilfs>/<device>/segments */
struct kobject sg_segments_kobj;
struct completion sg_segments_kobj_unregister;
};
#define NILFS_COMMON_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
struct attribute attr; \
ssize_t (*show)(struct kobject *, struct attribute *, \
char *); \
ssize_t (*store)(struct kobject *, struct attribute *, \
const char *, size_t); \
};
NILFS_COMMON_ATTR_STRUCT(feature);
#define NILFS_DEV_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
struct attribute attr; \
ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
char *); \
ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
const char *, size_t); \
};
NILFS_DEV_ATTR_STRUCT(dev);
NILFS_DEV_ATTR_STRUCT(segments);
NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
NILFS_DEV_ATTR_STRUCT(checkpoints);
NILFS_DEV_ATTR_STRUCT(superblock);
NILFS_DEV_ATTR_STRUCT(segctor);
#define NILFS_CP_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
struct attribute attr; \
ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
char *); \
ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
const char *, size_t); \
};
NILFS_CP_ATTR_STRUCT(snapshot);
#define NILFS_ATTR(type, name, mode, show, store) \
static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
__ATTR(name, mode, show, store)
#define NILFS_INFO_ATTR(type, name) \
NILFS_ATTR(type, name, 0444, NULL, NULL)
#define NILFS_RO_ATTR(type, name) \
NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
#define NILFS_RW_ATTR(type, name) \
NILFS_ATTR(type, name, 0644, \
nilfs_##type##_##name##_show, \
nilfs_##type##_##name##_store)
#define NILFS_FEATURE_INFO_ATTR(name) \
NILFS_INFO_ATTR(feature, name)
#define NILFS_FEATURE_RO_ATTR(name) \
NILFS_RO_ATTR(feature, name)
#define NILFS_FEATURE_RW_ATTR(name) \
NILFS_RW_ATTR(feature, name)
#define NILFS_DEV_INFO_ATTR(name) \
NILFS_INFO_ATTR(dev, name)
#define NILFS_DEV_RO_ATTR(name) \
NILFS_RO_ATTR(dev, name)
#define NILFS_DEV_RW_ATTR(name) \
NILFS_RW_ATTR(dev, name)
#define NILFS_SEGMENTS_RO_ATTR(name) \
NILFS_RO_ATTR(segments, name)
#define NILFS_SEGMENTS_RW_ATTR(name) \
NILFS_RW_ATTR(segs_info, name)
#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
NILFS_RO_ATTR(mounted_snapshots, name)
#define NILFS_CHECKPOINTS_RO_ATTR(name) \
NILFS_RO_ATTR(checkpoints, name)
#define NILFS_CHECKPOINTS_RW_ATTR(name) \
NILFS_RW_ATTR(checkpoints, name)
#define NILFS_SNAPSHOT_INFO_ATTR(name) \
NILFS_INFO_ATTR(snapshot, name)
#define NILFS_SNAPSHOT_RO_ATTR(name) \
NILFS_RO_ATTR(snapshot, name)
#define NILFS_SNAPSHOT_RW_ATTR(name) \
NILFS_RW_ATTR(snapshot, name)
#define NILFS_SUPERBLOCK_RO_ATTR(name) \
NILFS_RO_ATTR(superblock, name)
#define NILFS_SUPERBLOCK_RW_ATTR(name) \
NILFS_RW_ATTR(superblock, name)
#define NILFS_SEGCTOR_INFO_ATTR(name) \
NILFS_INFO_ATTR(segctor, name)
#define NILFS_SEGCTOR_RO_ATTR(name) \
NILFS_RO_ATTR(segctor, name)
#define NILFS_SEGCTOR_RW_ATTR(name) \
NILFS_RW_ATTR(segctor, name)
#define NILFS_FEATURE_ATTR_LIST(name) \
(&nilfs_feature_attr_##name.attr)
#define NILFS_DEV_ATTR_LIST(name) \
(&nilfs_dev_attr_##name.attr)
#define NILFS_SEGMENTS_ATTR_LIST(name) \
(&nilfs_segments_attr_##name.attr)
#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
(&nilfs_mounted_snapshots_attr_##name.attr)
#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
(&nilfs_checkpoints_attr_##name.attr)
#define NILFS_SNAPSHOT_ATTR_LIST(name) \
(&nilfs_snapshot_attr_##name.attr)
#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
(&nilfs_superblock_attr_##name.attr)
#define NILFS_SEGCTOR_ATTR_LIST(name) \
(&nilfs_segctor_attr_##name.attr)
#endif /* _NILFS_SYSFS_H */

816
fs/nilfs2/the_nilfs.c Normal file
View file

@ -0,0 +1,816 @@
/*
* the_nilfs.c - the_nilfs shared structure.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/random.h>
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
#include "alloc.h"
#include "cpfile.h"
#include "sufile.h"
#include "dat.h"
#include "segbuf.h"
static int nilfs_valid_sb(struct nilfs_super_block *sbp);
void nilfs_set_last_segment(struct the_nilfs *nilfs,
sector_t start_blocknr, u64 seq, __u64 cno)
{
spin_lock(&nilfs->ns_last_segment_lock);
nilfs->ns_last_pseg = start_blocknr;
nilfs->ns_last_seq = seq;
nilfs->ns_last_cno = cno;
if (!nilfs_sb_dirty(nilfs)) {
if (nilfs->ns_prev_seq == nilfs->ns_last_seq)
goto stay_cursor;
set_nilfs_sb_dirty(nilfs);
}
nilfs->ns_prev_seq = nilfs->ns_last_seq;
stay_cursor:
spin_unlock(&nilfs->ns_last_segment_lock);
}
/**
* alloc_nilfs - allocate a nilfs object
* @bdev: block device to which the_nilfs is related
*
* Return Value: On success, pointer to the_nilfs is returned.
* On error, NULL is returned.
*/
struct the_nilfs *alloc_nilfs(struct block_device *bdev)
{
struct the_nilfs *nilfs;
nilfs = kzalloc(sizeof(*nilfs), GFP_KERNEL);
if (!nilfs)
return NULL;
nilfs->ns_bdev = bdev;
atomic_set(&nilfs->ns_ndirtyblks, 0);
init_rwsem(&nilfs->ns_sem);
mutex_init(&nilfs->ns_snapshot_mount_mutex);
INIT_LIST_HEAD(&nilfs->ns_dirty_files);
INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
spin_lock_init(&nilfs->ns_inode_lock);
spin_lock_init(&nilfs->ns_next_gen_lock);
spin_lock_init(&nilfs->ns_last_segment_lock);
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
init_rwsem(&nilfs->ns_segctor_sem);
nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
return nilfs;
}
/**
* destroy_nilfs - destroy nilfs object
* @nilfs: nilfs object to be released
*/
void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
kfree(nilfs);
}
static int nilfs_load_super_root(struct the_nilfs *nilfs,
struct super_block *sb, sector_t sr_block)
{
struct buffer_head *bh_sr;
struct nilfs_super_root *raw_sr;
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct nilfs_inode *rawi;
unsigned dat_entry_size, segment_usage_size, checkpoint_size;
unsigned inode_size;
int err;
err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1);
if (unlikely(err))
return err;
down_read(&nilfs->ns_sem);
dat_entry_size = le16_to_cpu(sbp[0]->s_dat_entry_size);
checkpoint_size = le16_to_cpu(sbp[0]->s_checkpoint_size);
segment_usage_size = le16_to_cpu(sbp[0]->s_segment_usage_size);
up_read(&nilfs->ns_sem);
inode_size = nilfs->ns_inode_size;
rawi = (void *)bh_sr->b_data + NILFS_SR_DAT_OFFSET(inode_size);
err = nilfs_dat_read(sb, dat_entry_size, rawi, &nilfs->ns_dat);
if (err)
goto failed;
rawi = (void *)bh_sr->b_data + NILFS_SR_CPFILE_OFFSET(inode_size);
err = nilfs_cpfile_read(sb, checkpoint_size, rawi, &nilfs->ns_cpfile);
if (err)
goto failed_dat;
rawi = (void *)bh_sr->b_data + NILFS_SR_SUFILE_OFFSET(inode_size);
err = nilfs_sufile_read(sb, segment_usage_size, rawi,
&nilfs->ns_sufile);
if (err)
goto failed_cpfile;
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime);
failed:
brelse(bh_sr);
return err;
failed_cpfile:
iput(nilfs->ns_cpfile);
failed_dat:
iput(nilfs->ns_dat);
goto failed;
}
static void nilfs_init_recovery_info(struct nilfs_recovery_info *ri)
{
memset(ri, 0, sizeof(*ri));
INIT_LIST_HEAD(&ri->ri_used_segments);
}
static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri)
{
nilfs_dispose_segment_list(&ri->ri_used_segments);
}
/**
* nilfs_store_log_cursor - load log cursor from a super block
* @nilfs: nilfs object
* @sbp: buffer storing super block to be read
*
* nilfs_store_log_cursor() reads the last position of the log
* containing a super root from a given super block, and initializes
* relevant information on the nilfs object preparatory for log
* scanning and recovery.
*/
static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
int ret = 0;
nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg);
nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno);
nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq);
nilfs->ns_prev_seq = nilfs->ns_last_seq;
nilfs->ns_seg_seq = nilfs->ns_last_seq;
nilfs->ns_segnum =
nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
nilfs->ns_cno = nilfs->ns_last_cno + 1;
if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
printk(KERN_ERR "NILFS invalid last segment number.\n");
ret = -EINVAL;
}
return ret;
}
/**
* load_nilfs - load and recover the nilfs
* @nilfs: the_nilfs structure to be released
* @sb: super block isntance used to recover past segment
*
* load_nilfs() searches and load the latest super root,
* attaches the last segment, and does recovery if needed.
* The caller must call this exclusively for simultaneous mounts.
*/
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
{
struct nilfs_recovery_info ri;
unsigned int s_flags = sb->s_flags;
int really_read_only = bdev_read_only(nilfs->ns_bdev);
int valid_fs = nilfs_valid_fs(nilfs);
int err;
if (!valid_fs) {
printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
if (s_flags & MS_RDONLY) {
printk(KERN_INFO "NILFS: INFO: recovery "
"required for readonly filesystem.\n");
printk(KERN_INFO "NILFS: write access will "
"be enabled during recovery.\n");
}
}
nilfs_init_recovery_info(&ri);
err = nilfs_search_super_root(nilfs, &ri);
if (unlikely(err)) {
struct nilfs_super_block **sbp = nilfs->ns_sbp;
int blocksize;
if (err != -EINVAL)
goto scan_error;
if (!nilfs_valid_sb(sbp[1])) {
printk(KERN_WARNING
"NILFS warning: unable to fall back to spare"
"super block\n");
goto scan_error;
}
printk(KERN_INFO
"NILFS: try rollback from an earlier position\n");
/*
* restore super block with its spare and reconfigure
* relevant states of the nilfs object.
*/
memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
nilfs->ns_crc_seed = le32_to_cpu(sbp[0]->s_crc_seed);
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
/* verify consistency between two super blocks */
blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
if (blocksize != nilfs->ns_blocksize) {
printk(KERN_WARNING
"NILFS warning: blocksize differs between "
"two super blocks (%d != %d)\n",
blocksize, nilfs->ns_blocksize);
goto scan_error;
}
err = nilfs_store_log_cursor(nilfs, sbp[0]);
if (err)
goto scan_error;
/* drop clean flag to allow roll-forward and recovery */
nilfs->ns_mount_state &= ~NILFS_VALID_FS;
valid_fs = 0;
err = nilfs_search_super_root(nilfs, &ri);
if (err)
goto scan_error;
}
err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root);
if (unlikely(err)) {
printk(KERN_ERR "NILFS: error loading super root.\n");
goto failed;
}
if (valid_fs)
goto skip_recovery;
if (s_flags & MS_RDONLY) {
__u64 features;
if (nilfs_test_opt(nilfs, NORECOVERY)) {
printk(KERN_INFO "NILFS: norecovery option specified. "
"skipping roll-forward recovery\n");
goto skip_recovery;
}
features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
~NILFS_FEATURE_COMPAT_RO_SUPP;
if (features) {
printk(KERN_ERR "NILFS: couldn't proceed with "
"recovery because of unsupported optional "
"features (%llx)\n",
(unsigned long long)features);
err = -EROFS;
goto failed_unload;
}
if (really_read_only) {
printk(KERN_ERR "NILFS: write access "
"unavailable, cannot proceed.\n");
err = -EROFS;
goto failed_unload;
}
sb->s_flags &= ~MS_RDONLY;
} else if (nilfs_test_opt(nilfs, NORECOVERY)) {
printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
"option was specified for a read/write mount\n");
err = -EINVAL;
goto failed_unload;
}
err = nilfs_salvage_orphan_logs(nilfs, sb, &ri);
if (err)
goto failed_unload;
down_write(&nilfs->ns_sem);
nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */
err = nilfs_cleanup_super(sb);
up_write(&nilfs->ns_sem);
if (err) {
printk(KERN_ERR "NILFS: failed to update super block. "
"recovery unfinished.\n");
goto failed_unload;
}
printk(KERN_INFO "NILFS: recovery complete.\n");
skip_recovery:
nilfs_clear_recovery_info(&ri);
sb->s_flags = s_flags;
return 0;
scan_error:
printk(KERN_ERR "NILFS: error searching super root.\n");
goto failed;
failed_unload:
iput(nilfs->ns_cpfile);
iput(nilfs->ns_sufile);
iput(nilfs->ns_dat);
failed:
nilfs_clear_recovery_info(&ri);
sb->s_flags = s_flags;
return err;
}
static unsigned long long nilfs_max_size(unsigned int blkbits)
{
unsigned int max_bits;
unsigned long long res = MAX_LFS_FILESIZE; /* page cache limit */
max_bits = blkbits + NILFS_BMAP_KEY_BIT; /* bmap size limit */
if (max_bits < 64)
res = min_t(unsigned long long, res, (1ULL << max_bits) - 1);
return res;
}
/**
* nilfs_nrsvsegs - calculate the number of reserved segments
* @nilfs: nilfs object
* @nsegs: total number of segments
*/
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
{
return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
100));
}
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
{
nilfs->ns_nsegments = nsegs;
nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
}
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
printk(KERN_ERR "NILFS: unsupported revision "
"(superblock rev.=%d.%d, current rev.=%d.%d). "
"Please check the version of mkfs.nilfs.\n",
le32_to_cpu(sbp->s_rev_level),
le16_to_cpu(sbp->s_minor_rev_level),
NILFS_CURRENT_REV, NILFS_MINOR_REV);
return -EINVAL;
}
nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes);
if (nilfs->ns_sbsize > BLOCK_SIZE)
return -EINVAL;
nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size);
if (nilfs->ns_inode_size > nilfs->ns_blocksize) {
printk(KERN_ERR "NILFS: too large inode size: %d bytes.\n",
nilfs->ns_inode_size);
return -EINVAL;
} else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) {
printk(KERN_ERR "NILFS: too small inode size: %d bytes.\n",
nilfs->ns_inode_size);
return -EINVAL;
}
nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
printk(KERN_ERR "NILFS: too short segment.\n");
return -EINVAL;
}
nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
nilfs->ns_r_segments_percentage =
le32_to_cpu(sbp->s_r_segments_percentage);
if (nilfs->ns_r_segments_percentage < 1 ||
nilfs->ns_r_segments_percentage > 99) {
printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
return -EINVAL;
}
nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
static int nilfs_valid_sb(struct nilfs_super_block *sbp)
{
static unsigned char sum[4];
const int sumoff = offsetof(struct nilfs_super_block, s_sum);
size_t bytes;
u32 crc;
if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
return 0;
bytes = le16_to_cpu(sbp->s_bytes);
if (bytes > BLOCK_SIZE)
return 0;
crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
sumoff);
crc = crc32_le(crc, sum, 4);
crc = crc32_le(crc, (unsigned char *)sbp + sumoff + 4,
bytes - sumoff - 4);
return crc == le32_to_cpu(sbp->s_sum);
}
static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
{
return offset < ((le64_to_cpu(sbp->s_nsegments) *
le32_to_cpu(sbp->s_blocks_per_segment)) <<
(le32_to_cpu(sbp->s_log_block_size) + 10));
}
static void nilfs_release_super_block(struct the_nilfs *nilfs)
{
int i;
for (i = 0; i < 2; i++) {
if (nilfs->ns_sbp[i]) {
brelse(nilfs->ns_sbh[i]);
nilfs->ns_sbh[i] = NULL;
nilfs->ns_sbp[i] = NULL;
}
}
}
void nilfs_fall_back_super_block(struct the_nilfs *nilfs)
{
brelse(nilfs->ns_sbh[0]);
nilfs->ns_sbh[0] = nilfs->ns_sbh[1];
nilfs->ns_sbp[0] = nilfs->ns_sbp[1];
nilfs->ns_sbh[1] = NULL;
nilfs->ns_sbp[1] = NULL;
}
void nilfs_swap_super_block(struct the_nilfs *nilfs)
{
struct buffer_head *tsbh = nilfs->ns_sbh[0];
struct nilfs_super_block *tsbp = nilfs->ns_sbp[0];
nilfs->ns_sbh[0] = nilfs->ns_sbh[1];
nilfs->ns_sbp[0] = nilfs->ns_sbp[1];
nilfs->ns_sbh[1] = tsbh;
nilfs->ns_sbp[1] = tsbp;
}
static int nilfs_load_super_block(struct the_nilfs *nilfs,
struct super_block *sb, int blocksize,
struct nilfs_super_block **sbpp)
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct buffer_head **sbh = nilfs->ns_sbh;
u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size);
int valid[2], swp = 0;
sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
&sbh[0]);
sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]);
if (!sbp[0]) {
if (!sbp[1]) {
printk(KERN_ERR "NILFS: unable to read superblock\n");
return -EIO;
}
printk(KERN_WARNING
"NILFS warning: unable to read primary superblock "
"(blocksize = %d)\n", blocksize);
} else if (!sbp[1]) {
printk(KERN_WARNING
"NILFS warning: unable to read secondary superblock "
"(blocksize = %d)\n", blocksize);
}
/*
* Compare two super blocks and set 1 in swp if the secondary
* super block is valid and newer. Otherwise, set 0 in swp.
*/
valid[0] = nilfs_valid_sb(sbp[0]);
valid[1] = nilfs_valid_sb(sbp[1]);
swp = valid[1] && (!valid[0] ||
le64_to_cpu(sbp[1]->s_last_cno) >
le64_to_cpu(sbp[0]->s_last_cno));
if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) {
brelse(sbh[1]);
sbh[1] = NULL;
sbp[1] = NULL;
valid[1] = 0;
swp = 0;
}
if (!valid[swp]) {
nilfs_release_super_block(nilfs);
printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n",
sb->s_id);
return -EINVAL;
}
if (!valid[!swp])
printk(KERN_WARNING "NILFS warning: broken superblock. "
"using spare superblock (blocksize = %d).\n", blocksize);
if (swp)
nilfs_swap_super_block(nilfs);
nilfs->ns_sbwcount = 0;
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq);
*sbpp = sbp[0];
return 0;
}
/**
* init_nilfs - initialize a NILFS instance.
* @nilfs: the_nilfs structure
* @sb: super block
* @data: mount options
*
* init_nilfs() performs common initialization per block device (e.g.
* reading the super block, getting disk layout information, initializing
* shared fields in the_nilfs).
*
* Return Value: On success, 0 is returned. On error, a negative error
* code is returned.
*/
int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
{
struct nilfs_super_block *sbp;
int blocksize;
int err;
down_write(&nilfs->ns_sem);
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
if (!blocksize) {
printk(KERN_ERR "NILFS: unable to set blocksize\n");
err = -EINVAL;
goto out;
}
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
goto out;
err = nilfs_store_magic_and_option(sb, sbp, data);
if (err)
goto failed_sbh;
err = nilfs_check_feature_compatibility(sb, sbp);
if (err)
goto failed_sbh;
blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
if (blocksize < NILFS_MIN_BLOCK_SIZE ||
blocksize > NILFS_MAX_BLOCK_SIZE) {
printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
"filesystem blocksize %d\n", blocksize);
err = -EINVAL;
goto failed_sbh;
}
if (sb->s_blocksize != blocksize) {
int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
if (blocksize < hw_blocksize) {
printk(KERN_ERR
"NILFS: blocksize %d too small for device "
"(sector-size = %d).\n",
blocksize, hw_blocksize);
err = -EINVAL;
goto failed_sbh;
}
nilfs_release_super_block(nilfs);
sb_set_blocksize(sb, blocksize);
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
goto out;
/* not failed_sbh; sbh is released automatically
when reloading fails. */
}
nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
nilfs->ns_blocksize = blocksize;
get_random_bytes(&nilfs->ns_next_generation,
sizeof(nilfs->ns_next_generation));
err = nilfs_store_disk_layout(nilfs, sbp);
if (err)
goto failed_sbh;
sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits);
nilfs->ns_mount_state = le16_to_cpu(sbp->s_state);
err = nilfs_store_log_cursor(nilfs, sbp);
if (err)
goto failed_sbh;
err = nilfs_sysfs_create_device_group(sb);
if (err)
goto failed_sbh;
set_nilfs_init(nilfs);
err = 0;
out:
up_write(&nilfs->ns_sem);
return err;
failed_sbh:
nilfs_release_super_block(nilfs);
goto out;
}
int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
size_t nsegs)
{
sector_t seg_start, seg_end;
sector_t start = 0, nblocks = 0;
unsigned int sects_per_block;
__u64 *sn;
int ret = 0;
sects_per_block = (1 << nilfs->ns_blocksize_bits) /
bdev_logical_block_size(nilfs->ns_bdev);
for (sn = segnump; sn < segnump + nsegs; sn++) {
nilfs_get_segment_range(nilfs, *sn, &seg_start, &seg_end);
if (!nblocks) {
start = seg_start;
nblocks = seg_end - seg_start + 1;
} else if (start + nblocks == seg_start) {
nblocks += seg_end - seg_start + 1;
} else {
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
GFP_NOFS, 0);
if (ret < 0)
return ret;
nblocks = 0;
}
}
if (nblocks)
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
GFP_NOFS, 0);
return ret;
}
int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
{
unsigned long ncleansegs;
down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
*nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
return 0;
}
int nilfs_near_disk_full(struct the_nilfs *nilfs)
{
unsigned long ncleansegs, nincsegs;
ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
nincsegs = atomic_read(&nilfs->ns_ndirtyblks) /
nilfs->ns_blocks_per_segment + 1;
return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs;
}
struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node *n;
struct nilfs_root *root;
spin_lock(&nilfs->ns_cptree_lock);
n = nilfs->ns_cptree.rb_node;
while (n) {
root = rb_entry(n, struct nilfs_root, rb_node);
if (cno < root->cno) {
n = n->rb_left;
} else if (cno > root->cno) {
n = n->rb_right;
} else {
atomic_inc(&root->count);
spin_unlock(&nilfs->ns_cptree_lock);
return root;
}
}
spin_unlock(&nilfs->ns_cptree_lock);
return NULL;
}
struct nilfs_root *
nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node **p, *parent;
struct nilfs_root *root, *new;
int err;
root = nilfs_lookup_root(nilfs, cno);
if (root)
return root;
new = kzalloc(sizeof(*root), GFP_KERNEL);
if (!new)
return NULL;
spin_lock(&nilfs->ns_cptree_lock);
p = &nilfs->ns_cptree.rb_node;
parent = NULL;
while (*p) {
parent = *p;
root = rb_entry(parent, struct nilfs_root, rb_node);
if (cno < root->cno) {
p = &(*p)->rb_left;
} else if (cno > root->cno) {
p = &(*p)->rb_right;
} else {
atomic_inc(&root->count);
spin_unlock(&nilfs->ns_cptree_lock);
kfree(new);
return root;
}
}
new->cno = cno;
new->ifile = NULL;
new->nilfs = nilfs;
atomic_set(&new->count, 1);
atomic64_set(&new->inodes_count, 0);
atomic64_set(&new->blocks_count, 0);
rb_link_node(&new->rb_node, parent, p);
rb_insert_color(&new->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
err = nilfs_sysfs_create_snapshot_group(new);
if (err) {
kfree(new);
new = NULL;
}
return new;
}
void nilfs_put_root(struct nilfs_root *root)
{
if (atomic_dec_and_test(&root->count)) {
struct the_nilfs *nilfs = root->nilfs;
nilfs_sysfs_delete_snapshot_group(root);
spin_lock(&nilfs->ns_cptree_lock);
rb_erase(&root->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
if (root->ifile)
iput(root->ifile);
kfree(root);
}
}

396
fs/nilfs2/the_nilfs.h Normal file
View file

@ -0,0 +1,396 @@
/*
* the_nilfs.h - the_nilfs shared structure.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#ifndef _THE_NILFS_H
#define _THE_NILFS_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/rbtree.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
struct nilfs_sc_info;
struct nilfs_sysfs_dev_subgroups;
/* the_nilfs struct */
enum {
THE_NILFS_INIT = 0, /* Information from super_block is set */
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
};
/**
* struct the_nilfs - struct to supervise multiple nilfs mount points
* @ns_flags: flags
* @ns_flushed_device: flag indicating if all volatile data was flushed
* @ns_bdev: block device
* @ns_sem: semaphore for shared states
* @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
* @ns_sbh: buffer heads of on-disk super blocks
* @ns_sbp: pointers to super block data
* @ns_sbwtime: previous write time of super block
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
* @ns_mount_state: file system state
* @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
* @ns_pseg_offset: offset of next partial segment in the current full segment
* @ns_cno: next checkpoint number
* @ns_ctime: write time of the last segment
* @ns_nongc_ctime: write time of the last segment not for cleaner operation
* @ns_ndirtyblks: Number of dirty data blocks
* @ns_last_segment_lock: lock protecting fields for the latest segment
* @ns_last_pseg: start block number of the latest segment
* @ns_last_seq: sequence value of the latest segment
* @ns_last_cno: checkpoint number of the latest segment
* @ns_prot_seq: least sequence number of segments which must not be reclaimed
* @ns_prev_seq: base sequence number used to decide if advance log cursor
* @ns_writer: log writer
* @ns_segctor_sem: semaphore protecting log write
* @ns_dat: DAT file inode
* @ns_cpfile: checkpoint file inode
* @ns_sufile: segusage file inode
* @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root)
* @ns_cptree_lock: lock protecting @ns_cptree
* @ns_dirty_files: list of dirty files
* @ns_inode_lock: lock protecting @ns_dirty_files
* @ns_gc_inodes: dummy inodes to keep live blocks
* @ns_next_generation: next generation number for inodes
* @ns_next_gen_lock: lock protecting @ns_next_generation
* @ns_mount_opt: mount options
* @ns_resuid: uid for reserved blocks
* @ns_resgid: gid for reserved blocks
* @ns_interval: checkpoint creation interval
* @ns_watermark: watermark for the number of dirty buffers
* @ns_blocksize_bits: bit length of block size
* @ns_blocksize: block size
* @ns_nsegments: number of segments in filesystem
* @ns_blocks_per_segment: number of blocks per segment
* @ns_r_segments_percentage: reserved segments percentage
* @ns_nrsvsegs: number of reserved segments
* @ns_first_data_block: block number of first data block
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
* @ns_dev_kobj: /sys/fs/<nilfs>/<device>
* @ns_dev_kobj_unregister: completion state
* @ns_dev_subgroups: <device> subgroups pointer
*/
struct the_nilfs {
unsigned long ns_flags;
int ns_flushed_device;
struct block_device *ns_bdev;
struct rw_semaphore ns_sem;
struct mutex ns_snapshot_mount_mutex;
/*
* used for
* - loading the latest checkpoint exclusively.
* - allocating a new full segment.
*/
struct buffer_head *ns_sbh[2];
struct nilfs_super_block *ns_sbp[2];
time_t ns_sbwtime;
unsigned ns_sbwcount;
unsigned ns_sbsize;
unsigned ns_mount_state;
unsigned ns_sb_update_freq;
/*
* Following fields are dedicated to a writable FS-instance.
* Except for the period seeking checkpoint, code outside the segment
* constructor must lock a segment semaphore while accessing these
* fields.
* The writable FS-instance is sole during a lifetime of the_nilfs.
*/
u64 ns_seg_seq;
__u64 ns_segnum;
__u64 ns_nextnum;
unsigned long ns_pseg_offset;
__u64 ns_cno;
time_t ns_ctime;
time_t ns_nongc_ctime;
atomic_t ns_ndirtyblks;
/*
* The following fields hold information on the latest partial segment
* written to disk with a super root. These fields are protected by
* ns_last_segment_lock.
*/
spinlock_t ns_last_segment_lock;
sector_t ns_last_pseg;
u64 ns_last_seq;
__u64 ns_last_cno;
u64 ns_prot_seq;
u64 ns_prev_seq;
struct nilfs_sc_info *ns_writer;
struct rw_semaphore ns_segctor_sem;
/*
* Following fields are lock free except for the period before
* the_nilfs is initialized.
*/
struct inode *ns_dat;
struct inode *ns_cpfile;
struct inode *ns_sufile;
/* Checkpoint tree */
struct rb_root ns_cptree;
spinlock_t ns_cptree_lock;
/* Dirty inode list */
struct list_head ns_dirty_files;
spinlock_t ns_inode_lock;
/* GC inode list */
struct list_head ns_gc_inodes;
/* Inode allocator */
u32 ns_next_generation;
spinlock_t ns_next_gen_lock;
/* Mount options */
unsigned long ns_mount_opt;
uid_t ns_resuid;
gid_t ns_resgid;
unsigned long ns_interval;
unsigned long ns_watermark;
/* Disk layout information (static) */
unsigned int ns_blocksize_bits;
unsigned int ns_blocksize;
unsigned long ns_nsegments;
unsigned long ns_blocks_per_segment;
unsigned long ns_r_segments_percentage;
unsigned long ns_nrsvsegs;
unsigned long ns_first_data_block;
int ns_inode_size;
int ns_first_ino;
u32 ns_crc_seed;
/* /sys/fs/<nilfs>/<device> */
struct kobject ns_dev_kobj;
struct completion ns_dev_kobj_unregister;
struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
};
#define THE_NILFS_FNS(bit, name) \
static inline void set_nilfs_##name(struct the_nilfs *nilfs) \
{ \
set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
} \
static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \
{ \
clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
} \
static inline int nilfs_##name(struct the_nilfs *nilfs) \
{ \
return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
}
THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
/*
* Mount option operations
*/
#define nilfs_clear_opt(nilfs, opt) \
do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
#define nilfs_set_opt(nilfs, opt) \
do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0)
#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
#define nilfs_write_opt(nilfs, mask, opt) \
do { (nilfs)->ns_mount_opt = \
(((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
NILFS_MOUNT_##opt); \
} while (0)
/**
* struct nilfs_root - nilfs root object
* @cno: checkpoint number
* @rb_node: red-black tree node
* @count: refcount of this structure
* @nilfs: nilfs object
* @ifile: inode file
* @inodes_count: number of inodes
* @blocks_count: number of blocks
* @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
* @snapshot_kobj_unregister: completion state for kernel object
*/
struct nilfs_root {
__u64 cno;
struct rb_node rb_node;
atomic_t count;
struct the_nilfs *nilfs;
struct inode *ifile;
atomic64_t inodes_count;
atomic64_t blocks_count;
/* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
struct kobject snapshot_kobj;
struct completion snapshot_kobj_unregister;
};
/* Special checkpoint number */
#define NILFS_CPTREE_CURRENT_CNO 0
/* Minimum interval of periodical update of superblocks (in seconds) */
#define NILFS_SB_FREQ 10
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
u64 t = get_seconds();
return t < nilfs->ns_sbwtime ||
t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
}
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
{
int flip_bits = nilfs->ns_sbwcount & 0x0FL;
return (flip_bits != 0x08 && flip_bits != 0x0F);
}
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
struct the_nilfs *alloc_nilfs(struct block_device *bdev);
void destroy_nilfs(struct the_nilfs *nilfs);
int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs,
__u64 cno);
void nilfs_put_root(struct nilfs_root *root);
int nilfs_near_disk_full(struct the_nilfs *);
void nilfs_fall_back_super_block(struct the_nilfs *);
void nilfs_swap_super_block(struct the_nilfs *);
static inline void nilfs_get_root(struct nilfs_root *root)
{
atomic_inc(&root->count);
}
static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
{
unsigned valid_fs;
down_read(&nilfs->ns_sem);
valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
up_read(&nilfs->ns_sem);
return valid_fs;
}
static inline void
nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
sector_t *seg_start, sector_t *seg_end)
{
*seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum;
*seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1;
if (segnum == 0)
*seg_start = nilfs->ns_first_data_block;
}
static inline sector_t
nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum)
{
return (segnum == 0) ? nilfs->ns_first_data_block :
(sector_t)nilfs->ns_blocks_per_segment * segnum;
}
static inline __u64
nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr)
{
sector_t segnum = blocknr;
sector_div(segnum, nilfs->ns_blocks_per_segment);
return segnum;
}
static inline void
nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start,
sector_t seg_end)
{
/* terminate the current full segment (used in case of I/O-error) */
nilfs->ns_pseg_offset = seg_end - seg_start + 1;
}
static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs)
{
/* move forward with a full segment */
nilfs->ns_segnum = nilfs->ns_nextnum;
nilfs->ns_pseg_offset = 0;
nilfs->ns_seg_seq++;
}
static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs)
{
__u64 cno;
spin_lock(&nilfs->ns_last_segment_lock);
cno = nilfs->ns_last_cno;
spin_unlock(&nilfs->ns_last_segment_lock);
return cno;
}
static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n)
{
return n == nilfs->ns_segnum || n == nilfs->ns_nextnum;
}
static inline int nilfs_flush_device(struct the_nilfs *nilfs)
{
int err;
if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device)
return 0;
nilfs->ns_flushed_device = 1;
/*
* the store to ns_flushed_device must not be reordered after
* blkdev_issue_flush().
*/
smp_wmb();
err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL, NULL);
if (err != -EIO)
err = 0;
return err;
}
#endif /* _THE_NILFS_H */