Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

34
fs/gfs2/Kconfig Normal file
View file

@ -0,0 +1,34 @@
config GFS2_FS
tristate "GFS2 file system support"
depends on (64BIT || LBDAF)
select FS_POSIX_ACL
select CRC32
select QUOTACTL
help
A cluster filesystem.
Allows a cluster of computers to simultaneously use a block device
that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
and writes to the block device like a local filesystem, but also uses
a lock module to allow the computers coordinate their I/O so
filesystem consistency is maintained. One of the nifty features of
GFS is perfect consistency -- changes made to the filesystem on one
machine show up immediately on all other machines in the cluster.
To use the GFS2 filesystem in a cluster, you will need to enable
the locking module below. Documentation and utilities for GFS2 can
be found here: http://sources.redhat.com/cluster
The "nolock" lock module is now built in to GFS2 by default. If
you want to use the DLM, be sure to enable IPv4/6 networking.
config GFS2_FS_LOCKING_DLM
bool "GFS2 DLM locking"
depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
help
Multiple node locking module for GFS2
Most users of GFS2 will require this. It provides the locking
interface between GFS2 and the DLM, which is required to use GFS2
in a cluster environment.

10
fs/gfs2/Makefile Normal file
View file

@ -0,0 +1,10 @@
ccflags-y := -I$(src)
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
glops.o log.o lops.o main.o meta_io.o \
aops.o dentry.o export.o file.o \
ops_fstype.o inode.o quota.o \
recovery.o rgrp.o super.o sys.o trans.o util.o
gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o

121
fs/gfs2/acl.c Normal file
View file

@ -0,0 +1,121 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
#include "acl.h"
#include "xattr.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
#include "trans.h"
#include "util.h"
static const char *gfs2_acl_name(int type)
{
switch (type) {
case ACL_TYPE_ACCESS:
return GFS2_POSIX_ACL_ACCESS;
case ACL_TYPE_DEFAULT:
return GFS2_POSIX_ACL_DEFAULT;
}
return NULL;
}
struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct posix_acl *acl;
const char *name;
char *data;
int len;
if (!ip->i_eattr)
return NULL;
name = gfs2_acl_name(type);
if (name == NULL)
return ERR_PTR(-EINVAL);
len = gfs2_xattr_acl_get(ip, name, &data);
if (len < 0)
return ERR_PTR(len);
if (len == 0)
return NULL;
acl = posix_acl_from_xattr(&init_user_ns, data, len);
kfree(data);
return acl;
}
int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
int error;
int len;
char *data;
const char *name = gfs2_acl_name(type);
BUG_ON(name == NULL);
if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
return -E2BIG;
if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
error = posix_acl_equiv_mode(acl, &mode);
if (error < 0)
return error;
if (error == 0)
acl = NULL;
if (mode != inode->i_mode) {
inode->i_mode = mode;
mark_inode_dirty(inode);
}
}
if (acl) {
len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
if (len == 0)
return 0;
data = kmalloc(len, GFP_NOFS);
if (data == NULL)
return -ENOMEM;
error = posix_acl_to_xattr(&init_user_ns, acl, data, len);
if (error < 0)
goto out;
} else {
data = NULL;
len = 0;
}
error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
if (error)
goto out;
if (acl)
set_cached_acl(inode, type, acl);
else
forget_cached_acl(inode, type);
out:
kfree(data);
return error;
}

22
fs/gfs2/acl.h Normal file
View file

@ -0,0 +1,22 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __ACL_DOT_H__
#define __ACL_DOT_H__
#include "incore.h"
#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif /* __ACL_DOT_H__ */

1231
fs/gfs2/aops.c Normal file

File diff suppressed because it is too large Load diff

1495
fs/gfs2/bmap.c Normal file

File diff suppressed because it is too large Load diff

61
fs/gfs2/bmap.h Normal file
View file

@ -0,0 +1,61 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __BMAP_DOT_H__
#define __BMAP_DOT_H__
#include "inode.h"
struct inode;
struct gfs2_inode;
struct page;
/**
* gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
* @ip: the file
* @len: the number of bytes to be written to the file
* @data_blocks: returns the number of data blocks required
* @ind_blocks: returns the number of indirect blocks required
*
*/
static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
unsigned int len,
unsigned int *data_blocks,
unsigned int *ind_blocks)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
unsigned int tmp;
BUG_ON(gfs2_is_dir(ip));
*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
*ind_blocks = 3 * (sdp->sd_max_height - 1);
for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
*ind_blocks += tmp;
}
}
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh, int create);
extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
u64 *dblock, unsigned *extlen);
extern int gfs2_setattr_size(struct inode *inode, u64 size);
extern void gfs2_trim_blocks(struct inode *inode);
extern int gfs2_truncatei_resume(struct gfs2_inode *ip);
extern int gfs2_file_dealloc(struct gfs2_inode *ip);
extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
unsigned int len);
extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
#endif /* __BMAP_DOT_H__ */

134
fs/gfs2/dentry.c Normal file
View file

@ -0,0 +1,134 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/namei.h>
#include <linux/crc32.h>
#include "gfs2.h"
#include "incore.h"
#include "dir.h"
#include "glock.h"
#include "super.h"
#include "util.h"
#include "inode.h"
/**
* gfs2_drevalidate - Check directory lookup consistency
* @dentry: the mapping to check
* @flags: lookup flags
*
* Check to make sure the lookup necessary to arrive at this inode from its
* parent is still good.
*
* Returns: 1 if the dentry is ok, 0 if it isn't
*/
static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
{
struct dentry *parent;
struct gfs2_sbd *sdp;
struct gfs2_inode *dip;
struct inode *inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip = NULL;
int error;
int had_lock = 0;
if (flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
sdp = GFS2_SB(parent->d_inode);
dip = GFS2_I(parent->d_inode);
inode = dentry->d_inode;
if (inode) {
if (is_bad_inode(inode))
goto invalid;
ip = GFS2_I(inode);
}
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
goto valid;
had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
if (!had_lock) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
goto fail;
}
error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
switch (error) {
case 0:
if (!inode)
goto invalid_gunlock;
break;
case -ENOENT:
if (!inode)
goto valid_gunlock;
goto invalid_gunlock;
default:
goto fail_gunlock;
}
valid_gunlock:
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
valid:
dput(parent);
return 1;
invalid_gunlock:
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
invalid:
dput(parent);
return 0;
fail_gunlock:
gfs2_glock_dq_uninit(&d_gh);
fail:
dput(parent);
return 0;
}
static int gfs2_dhash(const struct dentry *dentry, struct qstr *str)
{
str->hash = gfs2_disk_hash(str->name, str->len);
return 0;
}
static int gfs2_dentry_delete(const struct dentry *dentry)
{
struct gfs2_inode *ginode;
if (!dentry->d_inode)
return 0;
ginode = GFS2_I(dentry->d_inode);
if (!ginode->i_iopen_gh.gh_gl)
return 0;
if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
return 1;
return 0;
}
const struct dentry_operations gfs2_dops = {
.d_revalidate = gfs2_drevalidate,
.d_hash = gfs2_dhash,
.d_delete = gfs2_dentry_delete,
};

2112
fs/gfs2/dir.c Normal file

File diff suppressed because it is too large Load diff

86
fs/gfs2/dir.h Normal file
View file

@ -0,0 +1,86 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __DIR_DOT_H__
#define __DIR_DOT_H__
#include <linux/dcache.h>
#include <linux/crc32.h>
struct inode;
struct gfs2_inode;
struct gfs2_inum;
struct buffer_head;
struct gfs2_dirent;
struct gfs2_diradd {
unsigned nr_blocks;
struct gfs2_dirent *dent;
struct buffer_head *bh;
int save_loc;
};
extern struct inode *gfs2_dir_search(struct inode *dir,
const struct qstr *filename,
bool fail_on_exist);
extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
const struct gfs2_inode *ip);
extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
const struct gfs2_inode *ip, struct gfs2_diradd *da);
static inline void gfs2_dir_no_add(struct gfs2_diradd *da)
{
if (da->bh)
brelse(da->bh);
da->bh = NULL;
}
extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
struct file_ra_state *f_ra);
extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
const struct gfs2_inode *nip, unsigned int new_type);
extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
extern int gfs2_diradd_alloc_required(struct inode *dir,
const struct qstr *filename,
struct gfs2_diradd *da);
extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
struct buffer_head **bhp);
extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
static inline u32 gfs2_disk_hash(const char *data, int len)
{
return crc32_le((u32)~0, data, len) ^ (u32)~0;
}
static inline void gfs2_str2qstr(struct qstr *name, const char *fname)
{
name->name = fname;
name->len = strlen(fname);
name->hash = gfs2_disk_hash(name->name, name->len);
}
/* N.B. This probably ought to take inum & type as args as well */
static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent)
{
dent->de_inum.no_addr = cpu_to_be64(0);
dent->de_inum.no_formal_ino = cpu_to_be64(0);
dent->de_hash = cpu_to_be32(name->hash);
dent->de_rec_len = cpu_to_be16(reclen);
dent->de_name_len = cpu_to_be16(name->len);
dent->de_type = cpu_to_be16(0);
memset(dent->__pad, 0, sizeof(dent->__pad));
memcpy(dent + 1, name->name, name->len);
}
extern struct qstr gfs2_qdot;
extern struct qstr gfs2_qdotdot;
#endif /* __DIR_DOT_H__ */

206
fs/gfs2/export.c Normal file
View file

@ -0,0 +1,206 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include "gfs2.h"
#include "incore.h"
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "super.h"
#include "rgrp.h"
#include "util.h"
#define GFS2_SMALL_FH_SIZE 4
#define GFS2_LARGE_FH_SIZE 8
#define GFS2_OLD_FH_SIZE 10
static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len,
struct inode *parent)
{
__be32 *fh = (__force __be32 *)p;
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
if (parent && (*len < GFS2_LARGE_FH_SIZE)) {
*len = GFS2_LARGE_FH_SIZE;
return FILEID_INVALID;
} else if (*len < GFS2_SMALL_FH_SIZE) {
*len = GFS2_SMALL_FH_SIZE;
return FILEID_INVALID;
}
fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
*len = GFS2_SMALL_FH_SIZE;
if (!parent || inode == sb->s_root->d_inode)
return *len;
ip = GFS2_I(parent);
fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
*len = GFS2_LARGE_FH_SIZE;
return *len;
}
struct get_name_filldir {
struct dir_context ctx;
struct gfs2_inum_host inum;
char *name;
};
static int get_name_filldir(void *opaque, const char *name, int length,
loff_t offset, u64 inum, unsigned int type)
{
struct get_name_filldir *gnfd = opaque;
if (inum != gnfd->inum.no_addr)
return 0;
memcpy(gnfd->name, name, length);
gnfd->name[length] = 0;
return 1;
}
static int gfs2_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct inode *dir = parent->d_inode;
struct inode *inode = child->d_inode;
struct gfs2_inode *dip, *ip;
struct get_name_filldir gnfd = {
.ctx.actor = get_name_filldir,
.name = name
};
struct gfs2_holder gh;
int error;
struct file_ra_state f_ra = { .start = 0 };
if (!dir)
return -EINVAL;
if (!S_ISDIR(dir->i_mode) || !inode)
return -EINVAL;
dip = GFS2_I(dir);
ip = GFS2_I(inode);
*name = 0;
gnfd.inum.no_addr = ip->i_no_addr;
gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
if (error)
return error;
error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra);
gfs2_glock_dq_uninit(&gh);
if (!error && !*name)
error = -ENOENT;
return error;
}
static struct dentry *gfs2_get_parent(struct dentry *child)
{
return d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
}
static struct dentry *gfs2_get_dentry(struct super_block *sb,
struct gfs2_inum_host *inum)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct inode *inode;
inode = gfs2_ilookup(sb, inum->no_addr, 0);
if (inode) {
if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
iput(inode);
return ERR_PTR(-ESTALE);
}
goto out_inode;
}
inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
GFS2_BLKST_DINODE);
if (IS_ERR(inode))
return ERR_CAST(inode);
out_inode:
return d_obtain_alias(inode);
}
static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct gfs2_inum_host this;
__be32 *fh = (__force __be32 *)fid->raw;
switch (fh_type) {
case GFS2_SMALL_FH_SIZE:
case GFS2_LARGE_FH_SIZE:
case GFS2_OLD_FH_SIZE:
if (fh_len < GFS2_SMALL_FH_SIZE)
return NULL;
this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
this.no_formal_ino |= be32_to_cpu(fh[1]);
this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
this.no_addr |= be32_to_cpu(fh[3]);
return gfs2_get_dentry(sb, &this);
default:
return NULL;
}
}
static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct gfs2_inum_host parent;
__be32 *fh = (__force __be32 *)fid->raw;
switch (fh_type) {
case GFS2_LARGE_FH_SIZE:
case GFS2_OLD_FH_SIZE:
if (fh_len < GFS2_LARGE_FH_SIZE)
return NULL;
parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
parent.no_formal_ino |= be32_to_cpu(fh[5]);
parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
parent.no_addr |= be32_to_cpu(fh[7]);
return gfs2_get_dentry(sb, &parent);
default:
return NULL;
}
}
const struct export_operations gfs2_export_ops = {
.encode_fh = gfs2_encode_fh,
.fh_to_dentry = gfs2_fh_to_dentry,
.fh_to_parent = gfs2_fh_to_parent,
.get_name = gfs2_get_name,
.get_parent = gfs2_get_parent,
};

1103
fs/gfs2/file.c Normal file

File diff suppressed because it is too large Load diff

26
fs/gfs2/gfs2.h Normal file
View file

@ -0,0 +1,26 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __GFS2_DOT_H__
#define __GFS2_DOT_H__
enum {
NO_CREATE = 0,
CREATE = 1,
};
enum {
NO_FORCE = 0,
FORCE = 1,
};
#define GFS2_FAST_NAME_SIZE 8
#endif /* __GFS2_DOT_H__ */

2119
fs/gfs2/glock.c Normal file

File diff suppressed because it is too large Load diff

250
fs/gfs2/glock.h Normal file
View file

@ -0,0 +1,250 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __GLOCK_DOT_H__
#define __GLOCK_DOT_H__
#include <linux/sched.h>
#include <linux/parser.h>
#include "incore.h"
/* Options for hostdata parser */
enum {
Opt_jid,
Opt_id,
Opt_first,
Opt_nodir,
Opt_err,
};
/*
* lm_lockname types
*/
#define LM_TYPE_RESERVED 0x00
#define LM_TYPE_NONDISK 0x01
#define LM_TYPE_INODE 0x02
#define LM_TYPE_RGRP 0x03
#define LM_TYPE_META 0x04
#define LM_TYPE_IOPEN 0x05
#define LM_TYPE_FLOCK 0x06
#define LM_TYPE_PLOCK 0x07
#define LM_TYPE_QUOTA 0x08
#define LM_TYPE_JOURNAL 0x09
/*
* lm_lock() states
*
* SHARED is compatible with SHARED, not with DEFERRED or EX.
* DEFERRED is compatible with DEFERRED, not with SHARED or EX.
*/
#define LM_ST_UNLOCKED 0
#define LM_ST_EXCLUSIVE 1
#define LM_ST_DEFERRED 2
#define LM_ST_SHARED 3
/*
* lm_lock() flags
*
* LM_FLAG_TRY
* Don't wait to acquire the lock if it can't be granted immediately.
*
* LM_FLAG_TRY_1CB
* Send one blocking callback if TRY is set and the lock is not granted.
*
* LM_FLAG_NOEXP
* GFS sets this flag on lock requests it makes while doing journal recovery.
* These special requests should not be blocked due to the recovery like
* ordinary locks would be.
*
* LM_FLAG_ANY
* A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
* also be granted in SHARED. The preferred state is whichever is compatible
* with other granted locks, or the specified state if no other locks exist.
*
* LM_FLAG_PRIORITY
* Override fairness considerations. Suppose a lock is held in a shared state
* and there is a pending request for the deferred state. A shared lock
* request with the priority flag would be allowed to bypass the deferred
* request and directly join the other shared lock. A shared lock request
* without the priority flag might be forced to wait until the deferred
* requested had acquired and released the lock.
*/
#define LM_FLAG_TRY 0x00000001
#define LM_FLAG_TRY_1CB 0x00000002
#define LM_FLAG_NOEXP 0x00000004
#define LM_FLAG_ANY 0x00000008
#define LM_FLAG_PRIORITY 0x00000010
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
#define GL_NOCACHE 0x00000400
/*
* lm_async_cb return flags
*
* LM_OUT_ST_MASK
* Masks the lower two bits of lock state in the returned value.
*
* LM_OUT_CANCELED
* The lock request was canceled.
*
*/
#define LM_OUT_ST_MASK 0x00000003
#define LM_OUT_CANCELED 0x00000008
#define LM_OUT_ERROR 0x00000004
/*
* lm_recovery_done() messages
*/
#define LM_RD_GAVEUP 308
#define LM_RD_SUCCESS 309
#define GLR_TRYFAILED 13
#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
#define GL_GLOCK_MIN_HOLD (long)(10)
#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
struct lm_lockops {
const char *lm_proto_name;
int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
void (*lm_first_done) (struct gfs2_sbd *sdp);
void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
unsigned int result);
void (*lm_unmount) (struct gfs2_sbd *sdp);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
void (*lm_put_lock) (struct gfs2_glock *gl);
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags);
void (*lm_cancel) (struct gfs2_glock *gl);
const match_table_t *lm_tokens;
};
extern struct workqueue_struct *gfs2_delete_workqueue;
static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
struct pid *pid;
/* Look in glock's list of holders for one with current task as owner */
spin_lock(&gl->gl_spin);
pid = task_pid(current);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
break;
if (gh->gh_owner_pid == pid)
goto out;
}
gh = NULL;
out:
spin_unlock(&gl->gl_spin);
return gh;
}
static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
{
return gl->gl_state == LM_ST_EXCLUSIVE;
}
static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
{
return gl->gl_state == LM_ST_DEFERRED;
}
static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
{
return gl->gl_state == LM_ST_SHARED;
}
static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
{
if (gl->gl_ops->go_flags & GLOF_ASPACE)
return (struct address_space *)(gl + 1);
return NULL;
}
extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
int create, struct gfs2_glock **glp);
extern void gfs2_glock_put(struct gfs2_glock *gl);
extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
unsigned flags, struct gfs2_holder *gh);
extern void gfs2_holder_reinit(unsigned int state, unsigned flags,
struct gfs2_holder *gh);
extern void gfs2_holder_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq(struct gfs2_holder *gh);
extern int gfs2_glock_poll(struct gfs2_holder *gh);
extern int gfs2_glock_wait(struct gfs2_holder *gh);
extern void gfs2_glock_dq(struct gfs2_holder *gh);
extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
unsigned int state, int flags,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
extern __printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
/**
* gfs2_glock_nq_init - initialize a holder and enqueue it on a glock
* @gl: the glock
* @state: the state we're requesting
* @flags: the modifier flags
* @gh: the holder structure
*
* Returns: 0, GLR_*, or errno
*/
static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
unsigned int state, int flags,
struct gfs2_holder *gh)
{
int error;
gfs2_holder_init(gl, state, flags, gh);
error = gfs2_glock_nq(gh);
if (error)
gfs2_holder_uninit(gh);
return error;
}
extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
extern void gfs2_glock_free(struct gfs2_glock *gl);
extern int __init gfs2_glock_init(void);
extern void gfs2_glock_exit(void);
extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
extern int gfs2_register_debugfs(void);
extern void gfs2_unregister_debugfs(void);
extern const struct lm_lockops gfs2_dlm_ops;
#endif /* __GLOCK_DOT_H__ */

618
fs/gfs2/glops.c Normal file
View file

@ -0,0 +1,618 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include <linux/posix_acl.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "util.h"
#include "trans.h"
#include "dir.h"
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
bh->b_page->mapping, bh->b_page->flags);
fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
}
/**
* __gfs2_ail_flush - remove all buffers for a given lock from the AIL
* @gl: the glock
* @fsync: set when called from fsync (not all buffers will be clean)
*
* None of the buffers should be dirty, locked, or pinned.
*/
static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
unsigned int nr_revokes)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd, *tmp;
struct buffer_head *bh;
const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock);
gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
if (nr_revokes == 0)
break;
bh = bd->bd_bh;
if (bh->b_state & b_state) {
if (fsync)
continue;
gfs2_ail_error(gl, bh);
}
gfs2_trans_add_revoke(sdp, bd);
nr_revokes--;
}
GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
}
static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_trans tr;
memset(&tr, 0, sizeof(tr));
INIT_LIST_HEAD(&tr.tr_buf);
INIT_LIST_HEAD(&tr.tr_databuf);
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
if (!tr.tr_revokes)
return;
/* A shortened, inline version of gfs2_trans_begin()
* tr->alloced is not set since the transaction structure is
* on the stack */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = _RET_IP_;
sb_start_intwrite(sdp->sd_vfs);
if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) {
sb_end_intwrite(sdp->sd_vfs);
return;
}
WARN_ON_ONCE(current->journal_info);
current->journal_info = &tr;
__gfs2_ail_flush(gl, 0, tr.tr_revokes);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
}
void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
unsigned int revokes = atomic_read(&gl->gl_ail_count);
unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
int ret;
if (!revokes)
return;
while (revokes > max_revokes)
max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
ret = gfs2_trans_begin(sdp, 0, max_revokes);
if (ret)
return;
__gfs2_ail_flush(gl, fsync, max_revokes);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
}
/**
* rgrp_go_sync - sync out the metadata for this glock
* @gl: the glock
*
* Called when demoting or unlocking an EX glock. We must flush
* to disk all dirty buffers/pages relating to this glock, and must not
* not return to caller to demote/unlock the glock until I/O is complete.
*/
static void rgrp_go_sync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd;
int error;
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return;
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
gfs2_log_flush(sdp, gl, NORMAL_FLUSH);
filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
mapping_set_error(mapping, error);
gfs2_ail_empty_gl(gl);
spin_lock(&gl->gl_spin);
rgd = gl->gl_object;
if (rgd)
gfs2_free_clones(rgd);
spin_unlock(&gl->gl_spin);
}
/**
* rgrp_go_inval - invalidate the metadata for this glock
* @gl: the glock
* @flags:
*
* We never used LM_ST_DEFERRED with resource groups, so that we
* should always see the metadata flag set here.
*
*/
static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct address_space *mapping = &sdp->sd_aspace;
WARN_ON_ONCE(!(flags & DIO_METADATA));
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
if (gl->gl_object) {
struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
}
}
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
*
*/
static void inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
struct address_space *metamapping = gfs2_glock2aspace(gl);
int error;
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
if (ip) {
if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
inode_dio_wait(&ip->i_inode);
}
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return;
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH);
filemap_fdatawrite(metamapping);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
filemap_fdatawrite(mapping);
error = filemap_fdatawait(mapping);
mapping_set_error(mapping, error);
}
error = filemap_fdatawait(metamapping);
mapping_set_error(metamapping, error);
gfs2_ail_empty_gl(gl);
/*
* Writeback of the data mapping may cause the dirty flag to be set
* so we have to clear it again here.
*/
smp_mb__before_atomic();
clear_bit(GLF_DIRTY, &gl->gl_flags);
}
/**
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
*
* Normally we invalidate everything, but if we are moving into
* LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
* can keep hold of the metadata, since it won't have changed.
*
*/
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gl->gl_object;
gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
if (flags & DIO_METADATA) {
struct address_space *mapping = gfs2_glock2aspace(gl);
truncate_inode_pages(mapping, 0);
if (ip) {
set_bit(GIF_INVALID, &ip->i_flags);
forget_all_cached_acls(&ip->i_inode);
gfs2_dir_hash_inval(ip);
}
}
if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH);
gl->gl_sbd->sd_rindex_uptodate = 0;
}
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
}
/**
* inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_holder *gh;
if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
return 0;
if (!list_empty(&gl->gl_holders)) {
gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
if (gh->gh_list.next != &gl->gl_holders)
return 0;
}
return 1;
}
/**
* gfs2_set_nlink - Set the inode's link count based on on-disk info
* @inode: The inode in question
* @nlink: The link count
*
* If the link count has hit zero, it must never be raised, whatever the
* on-disk inode might say. When new struct inodes are created the link
* count is set to 1, so that we can safely use this test even when reading
* in on disk information for the first time.
*/
static void gfs2_set_nlink(struct inode *inode, u32 nlink)
{
/*
* We will need to review setting the nlink count here in the
* light of the forthcoming ro bind mount work. This is a reminder
* to do that.
*/
if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
if (nlink == 0)
clear_nlink(inode);
else
set_nlink(inode, nlink);
}
}
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
const struct gfs2_dinode *str = buf;
struct timespec atime;
u16 height, depth;
if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
goto corrupt;
ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
ip->i_inode.i_rdev = 0;
switch (ip->i_inode.i_mode & S_IFMT) {
case S_IFBLK:
case S_IFCHR:
ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
be32_to_cpu(str->di_minor));
break;
};
i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
atime.tv_sec = be64_to_cpu(str->di_atime);
atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
ip->i_inode.i_atime = atime;
ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
ip->i_goal = be64_to_cpu(str->di_goal_meta);
ip->i_generation = be64_to_cpu(str->di_generation);
ip->i_diskflags = be32_to_cpu(str->di_flags);
ip->i_eattr = be64_to_cpu(str->di_eattr);
/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
gfs2_set_inode_flags(&ip->i_inode);
height = be16_to_cpu(str->di_height);
if (unlikely(height > GFS2_MAX_META_HEIGHT))
goto corrupt;
ip->i_height = (u8)height;
depth = be16_to_cpu(str->di_depth);
if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
goto corrupt;
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
if (S_ISREG(ip->i_inode.i_mode))
gfs2_set_aops(&ip->i_inode);
return 0;
corrupt:
gfs2_consist_inode(ip);
return -EIO;
}
/**
* gfs2_inode_refresh - Refresh the incore copy of the dinode
* @ip: The GFS2 inode
*
* Returns: errno
*/
int gfs2_inode_refresh(struct gfs2_inode *ip)
{
struct buffer_head *dibh;
int error;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
return error;
error = gfs2_dinode_in(ip, dibh->b_data);
brelse(dibh);
clear_bit(GIF_INVALID, &ip->i_flags);
return error;
}
/**
* inode_go_lock - operation done after an inode lock is locked by a process
* @gl: the glock
* @flags:
*
* Returns: errno
*/
static int inode_go_lock(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
if (!ip || (gh->gh_flags & GL_SKIP))
return 0;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
if (error)
return error;
}
if (gh->gh_state != LM_ST_DEFERRED)
inode_dio_wait(&ip->i_inode);
if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_state == LM_ST_EXCLUSIVE)) {
spin_lock(&sdp->sd_trunc_lock);
if (list_empty(&ip->i_trunc_list))
list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
wake_up(&sdp->sd_quota_wait);
return 1;
}
return error;
}
/**
* inode_go_dump - print information about an inode
* @seq: The iterator
* @ip: the inode
*
*/
static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
{
const struct gfs2_inode *ip = gl->gl_object;
if (ip == NULL)
return;
gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
IF2DT(ip->i_inode.i_mode), ip->i_flags,
(unsigned int)ip->i_diskflags,
(unsigned long long)i_size_read(&ip->i_inode));
}
/**
* freeze_go_sync - promote/demote the freeze glock
* @gl: the glock
* @state: the requested state
* @flags:
*
*/
static void freeze_go_sync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
DEFINE_WAIT(wait);
if (gl->gl_state == LM_ST_SHARED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
atomic_set(&sdp->sd_log_freeze, 1);
wake_up(&sdp->sd_logd_waitq);
do {
prepare_to_wait(&sdp->sd_log_frozen_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&sdp->sd_log_freeze))
io_schedule();
} while(atomic_read(&sdp->sd_log_freeze));
finish_wait(&sdp->sd_log_frozen_wait, &wait);
}
}
/**
* freeze_go_xmote_bh - After promoting/demoting the freeze glock
* @gl: the glock
*
*/
static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
struct gfs2_log_header_host head;
int error;
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (error)
gfs2_consist(sdp);
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
gfs2_consist(sdp);
/* Initialize some head of the log stuff */
if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
}
return 0;
}
/**
* trans_go_demote_ok
* @gl: the glock
*
* Always returns 0
*/
static int freeze_go_demote_ok(const struct gfs2_glock *gl)
{
return 0;
}
/**
* iopen_go_callback - schedule the dcache entry for the inode to be deleted
* @gl: the glock
*
* gl_spin lock is held while calling this
*/
static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
{
struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
struct gfs2_sbd *sdp = gl->gl_sbd;
if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY))
return;
if (gl->gl_demote_state == LM_ST_UNLOCKED &&
gl->gl_state == LM_ST_SHARED && ip) {
gl->gl_lockref.count++;
if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
gl->gl_lockref.count--;
}
}
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_type = LM_TYPE_META,
};
const struct gfs2_glock_operations gfs2_inode_glops = {
.go_sync = inode_go_sync,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
.go_flags = GLOF_ASPACE,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_sync = rgrp_go_sync,
.go_inval = rgrp_go_inval,
.go_lock = gfs2_rgrp_go_lock,
.go_unlock = gfs2_rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
.go_type = LM_TYPE_RGRP,
.go_flags = GLOF_LVB,
};
const struct gfs2_glock_operations gfs2_freeze_glops = {
.go_sync = freeze_go_sync,
.go_xmote_bh = freeze_go_xmote_bh,
.go_demote_ok = freeze_go_demote_ok,
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_type = LM_TYPE_IOPEN,
.go_callback = iopen_go_callback,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
.go_type = LM_TYPE_FLOCK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
.go_type = LM_TYPE_QUOTA,
.go_flags = GLOF_LVB,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
};
const struct gfs2_glock_operations *gfs2_glops_list[] = {
[LM_TYPE_META] = &gfs2_meta_glops,
[LM_TYPE_INODE] = &gfs2_inode_glops,
[LM_TYPE_RGRP] = &gfs2_rgrp_glops,
[LM_TYPE_IOPEN] = &gfs2_iopen_glops,
[LM_TYPE_FLOCK] = &gfs2_flock_glops,
[LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
[LM_TYPE_QUOTA] = &gfs2_quota_glops,
[LM_TYPE_JOURNAL] = &gfs2_journal_glops,
};

28
fs/gfs2/glops.h Normal file
View file

@ -0,0 +1,28 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __GLOPS_DOT_H__
#define __GLOPS_DOT_H__
#include "incore.h"
extern const struct gfs2_glock_operations gfs2_meta_glops;
extern const struct gfs2_glock_operations gfs2_inode_glops;
extern const struct gfs2_glock_operations gfs2_rgrp_glops;
extern const struct gfs2_glock_operations gfs2_freeze_glops;
extern const struct gfs2_glock_operations gfs2_iopen_glops;
extern const struct gfs2_glock_operations gfs2_flock_glops;
extern const struct gfs2_glock_operations gfs2_nondisk_glops;
extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
extern const struct gfs2_glock_operations *gfs2_glops_list[];
extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
#endif /* __GLOPS_DOT_H__ */

834
fs/gfs2/incore.h Normal file
View file

@ -0,0 +1,834 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __INCORE_DOT_H__
#define __INCORE_DOT_H__
#include <linux/fs.h>
#include <linux/kobject.h>
#include <linux/workqueue.h>
#include <linux/dlm.h>
#include <linux/buffer_head.h>
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
#include <linux/completion.h>
#include <linux/rbtree.h>
#include <linux/ktime.h>
#include <linux/percpu.h>
#include <linux/lockref.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
struct gfs2_log_operations;
struct gfs2_bufdata;
struct gfs2_holder;
struct gfs2_glock;
struct gfs2_quota_data;
struct gfs2_trans;
struct gfs2_jdesc;
struct gfs2_sbd;
struct lm_lockops;
typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
struct gfs2_log_header_host {
u64 lh_sequence; /* Sequence number of this transaction */
u32 lh_flags; /* GFS2_LOG_HEAD_... */
u32 lh_tail; /* Block number of log tail */
u32 lh_blkno;
u32 lh_hash;
};
/*
* Structure of operations that are associated with each
* type of element in the log.
*/
struct gfs2_log_operations {
void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
void (*lo_before_scan) (struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass);
int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass);
void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
const char *lo_name;
};
#define GBF_FULL 1
struct gfs2_bitmap {
struct buffer_head *bi_bh;
char *bi_clone;
unsigned long bi_flags;
u32 bi_offset;
u32 bi_start;
u32 bi_len;
u32 bi_blocks;
};
struct gfs2_rgrpd {
struct rb_node rd_node; /* Link with superblock */
struct gfs2_glock *rd_gl; /* Glock for this rgrp */
u64 rd_addr; /* grp block disk address */
u64 rd_data0; /* first data location */
u32 rd_length; /* length of rgrp header in fs blocks */
u32 rd_data; /* num of data blocks in rgrp */
u32 rd_bitbytes; /* number of bytes in data bitmaps */
u32 rd_free;
u32 rd_reserved; /* number of blocks reserved */
u32 rd_free_clone;
u32 rd_dinodes;
u64 rd_igeneration;
struct gfs2_bitmap *rd_bits;
struct gfs2_sbd *rd_sbd;
struct gfs2_rgrp_lvb *rd_rgl;
u32 rd_last_alloc;
u32 rd_flags;
u32 rd_extfail_pt; /* extent failure point */
#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */
#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */
#define GFS2_RDF_ERROR 0x40000000 /* error in rg */
#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
spinlock_t rd_rsspin; /* protects reservation related vars */
struct rb_root rd_rstree; /* multi-block reservation tree */
};
struct gfs2_rbm {
struct gfs2_rgrpd *rgd;
u32 offset; /* The offset is bitmap relative */
int bii; /* Bitmap index */
};
static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
{
return rbm->rgd->rd_bits + rbm->bii;
}
static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
{
return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
rbm->offset;
}
static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
const struct gfs2_rbm *rbm2)
{
return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) &&
(rbm1->offset == rbm2->offset);
}
enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
BH_Zeronew = BH_PrivateStart + 2,
};
BUFFER_FNS(Pinned, pinned)
TAS_BUFFER_FNS(Pinned, pinned)
BUFFER_FNS(Escaped, escaped)
TAS_BUFFER_FNS(Escaped, escaped)
BUFFER_FNS(Zeronew, zeronew)
TAS_BUFFER_FNS(Zeronew, zeronew)
struct gfs2_bufdata {
struct buffer_head *bd_bh;
struct gfs2_glock *bd_gl;
u64 bd_blkno;
struct list_head bd_list;
const struct gfs2_log_operations *bd_ops;
struct gfs2_trans *bd_tr;
struct list_head bd_ail_st_list;
struct list_head bd_ail_gl_list;
};
/*
* Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
* prefix of lock_dlm_ gets awkward.
*/
#define GDLM_STRNAME_BYTES 25
#define GDLM_LVB_SIZE 32
/*
* ls_recover_flags:
*
* DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
* held by failed nodes whose journals need recovery. Those locks should
* only be used for journal recovery until the journal recovery is done.
* This is set by the dlm recover_prep callback and cleared by the
* gfs2_control thread when journal recovery is complete. To avoid
* races between recover_prep setting and gfs2_control clearing, recover_spin
* is held while changing this bit and reading/writing recover_block
* and recover_start.
*
* DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
*
* DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
* recovery of all journals before allowing other nodes to mount the fs.
* This is cleared when FIRST_MOUNT_DONE is set.
*
* DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
* recovery of all journals, and now allows other nodes to mount the fs.
*
* DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
* BLOCK_LOCKS for the first time. The gfs2_control thread should now
* control clearing BLOCK_LOCKS for further recoveries.
*
* DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
*
* DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
* and recover_done(), i.e. set while recover_block == recover_start.
*/
enum {
DFL_BLOCK_LOCKS = 0,
DFL_NO_DLM_OPS = 1,
DFL_FIRST_MOUNT = 2,
DFL_FIRST_MOUNT_DONE = 3,
DFL_MOUNT_DONE = 4,
DFL_UNMOUNT = 5,
DFL_DLM_RECOVERY = 6,
};
struct lm_lockname {
u64 ln_number;
unsigned int ln_type;
};
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \
((name1)->ln_type == (name2)->ln_type))
struct gfs2_glock_operations {
void (*go_sync) (struct gfs2_glock *gl);
int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
const int go_type;
const unsigned long go_flags;
#define GLOF_ASPACE 1
#define GLOF_LVB 2
};
enum {
GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */
GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */
GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */
GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */
GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */
GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */
GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */
GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */
GFS2_NR_LKSTATS
};
struct gfs2_lkstats {
s64 stats[GFS2_NR_LKSTATS];
};
enum {
/* States */
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
HIF_FIRST = 7,
HIF_WAIT = 10,
};
struct gfs2_holder {
struct list_head gh_list;
struct gfs2_glock *gh_gl;
struct pid *gh_owner_pid;
unsigned int gh_state;
unsigned gh_flags;
int gh_error;
unsigned long gh_iflags; /* HIF_... */
unsigned long gh_ip;
};
/* Number of quota types we support */
#define GFS2_MAXQUOTAS 2
/* Resource group multi-block reservation, in order of appearance:
Step 1. Function prepares to write, allocates a mb, sets the size hint.
Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
Step 4. Bits are assigned from the rgrp based on either the reservation
or wherever it can.
*/
struct gfs2_blkreserv {
/* components used during write (step 1): */
atomic_t rs_sizehint; /* hint of the write size */
struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
struct rb_node rs_node; /* link to other block reservations */
struct gfs2_rbm rs_rbm; /* Start of reservation */
u32 rs_free; /* how many blocks are still free */
u64 rs_inum; /* Inode number for reservation */
/* ancillary quota stuff */
struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS];
struct gfs2_holder rs_qa_qd_ghs[2 * GFS2_MAXQUOTAS];
unsigned int rs_qa_qd_num;
};
/*
* Allocation parameters
* @target: The number of blocks we'd ideally like to allocate
* @aflags: The flags (e.g. Orlov flag)
*
* The intent is to gradually expand this structure over time in
* order to give more information, e.g. alignment, min extent size
* to the allocation code.
*/
struct gfs2_alloc_parms {
u32 target;
u32 aflags;
};
enum {
GLF_LOCK = 1,
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DEMOTE_IN_PROGRESS = 5,
GLF_DIRTY = 6,
GLF_LFLUSH = 7,
GLF_INVALIDATE_IN_PROGRESS = 8,
GLF_REPLY_PENDING = 9,
GLF_INITIAL = 10,
GLF_FROZEN = 11,
GLF_QUEUED = 12,
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
};
struct gfs2_glock {
struct hlist_bl_node gl_list;
struct gfs2_sbd *gl_sbd;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
struct lockref gl_lockref;
#define gl_spin gl_lockref.lock
/* State fields protected by gl_spin */
unsigned int gl_state:2, /* Current state */
gl_target:2, /* Target state */
gl_demote_state:2, /* State requested by remote node */
gl_req:2, /* State in last dlm request */
gl_reply:8; /* Last reply from the dlm */
unsigned int gl_hash;
unsigned long gl_demote_time; /* time of first demote request */
long gl_hold_time;
struct list_head gl_holders;
const struct gfs2_glock_operations *gl_ops;
ktime_t gl_dstamp;
struct gfs2_lkstats gl_stats;
struct dlm_lksb gl_lksb;
unsigned long gl_tchange;
void *gl_object;
struct list_head gl_lru;
struct list_head gl_ail_list;
atomic_t gl_ail_count;
atomic_t gl_revokes;
struct delayed_work gl_work;
union {
/* For inode and iopen glocks only */
struct work_struct gl_delete;
/* For rgrp glocks only */
struct {
loff_t start;
loff_t end;
} gl_vm;
};
struct rcu_head gl_rcu;
};
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
enum {
GIF_INVALID = 0,
GIF_QD_LOCKED = 1,
GIF_ALLOC_FAILED = 2,
GIF_SW_PAGED = 3,
GIF_ORDERED = 4,
GIF_FREE_VFS_INODE = 5,
};
struct gfs2_inode {
struct inode i_inode;
u64 i_no_addr;
u64 i_no_formal_ino;
u64 i_generation;
u64 i_eattr;
unsigned long i_flags; /* GIF_... */
struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
struct gfs2_rgrpd *i_rgd;
u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex;
struct list_head i_ordered;
struct list_head i_trunc_list;
__be64 *i_hash_cache;
u32 i_entries;
u32 i_diskflags;
u8 i_height;
u8 i_depth;
};
/*
* Since i_inode is the first element of struct gfs2_inode,
* this is effectively a cast.
*/
static inline struct gfs2_inode *GFS2_I(struct inode *inode)
{
return container_of(inode, struct gfs2_inode, i_inode);
}
static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode)
{
return inode->i_sb->s_fs_info;
}
struct gfs2_file {
struct mutex f_fl_mutex;
struct gfs2_holder f_fl_gh;
};
struct gfs2_revoke_replay {
struct list_head rr_list;
u64 rr_blkno;
unsigned int rr_where;
};
enum {
QDF_CHANGE = 1,
QDF_LOCKED = 2,
QDF_REFRESH = 3,
};
struct gfs2_quota_data {
struct hlist_bl_node qd_hlist;
struct list_head qd_list;
struct kqid qd_id;
struct gfs2_sbd *qd_sbd;
struct lockref qd_lockref;
struct list_head qd_lru;
unsigned qd_hash;
unsigned long qd_flags; /* QDF_... */
s64 qd_change;
s64 qd_change_sync;
unsigned int qd_slot;
unsigned int qd_slot_count;
struct buffer_head *qd_bh;
struct gfs2_quota_change *qd_bh_qc;
unsigned int qd_bh_count;
struct gfs2_glock *qd_gl;
struct gfs2_quota_lvb qd_qb;
u64 qd_sync_gen;
unsigned long qd_last_warn;
struct rcu_head qd_rcu;
};
struct gfs2_trans {
unsigned long tr_ip;
unsigned int tr_blocks;
unsigned int tr_revokes;
unsigned int tr_reserved;
unsigned int tr_touched:1;
unsigned int tr_attached:1;
unsigned int tr_alloced:1;
unsigned int tr_num_buf_new;
unsigned int tr_num_databuf_new;
unsigned int tr_num_buf_rm;
unsigned int tr_num_databuf_rm;
unsigned int tr_num_revoke;
unsigned int tr_num_revoke_rm;
struct list_head tr_list;
struct list_head tr_databuf;
struct list_head tr_buf;
unsigned int tr_first;
struct list_head tr_ail1_list;
struct list_head tr_ail2_list;
};
struct gfs2_journal_extent {
struct list_head list;
unsigned int lblock; /* First logical block */
u64 dblock; /* First disk block */
u64 blocks;
};
struct gfs2_jdesc {
struct list_head jd_list;
struct list_head extent_list;
unsigned int nr_extents;
struct work_struct jd_work;
struct inode *jd_inode;
unsigned long jd_flags;
#define JDF_RECOVERY 1
unsigned int jd_jid;
unsigned int jd_blocks;
int jd_recover_error;
/* Replay stuff */
unsigned int jd_found_blocks;
unsigned int jd_found_revokes;
unsigned int jd_replayed_blocks;
struct list_head jd_revoke_list;
unsigned int jd_replay_tail;
};
struct gfs2_statfs_change_host {
s64 sc_total;
s64 sc_free;
s64 sc_dinodes;
};
#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
#define GFS2_QUOTA_OFF 0
#define GFS2_QUOTA_ACCOUNT 1
#define GFS2_QUOTA_ON 2
#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
#define GFS2_DATA_WRITEBACK 1
#define GFS2_DATA_ORDERED 2
#define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW
#define GFS2_ERRORS_WITHDRAW 0
#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */
#define GFS2_ERRORS_RO 2 /* place holder for future feature */
#define GFS2_ERRORS_PANIC 3
struct gfs2_args {
char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
unsigned int ar_spectator:1; /* Don't get a journal */
unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */
unsigned int ar_debug:1; /* Oops on errors */
unsigned int ar_posix_acl:1; /* Enable posix acls */
unsigned int ar_quota:2; /* off/account/on */
unsigned int ar_suiddir:1; /* suiddir support */
unsigned int ar_data:2; /* ordered/writeback */
unsigned int ar_meta:1; /* mount metafs */
unsigned int ar_discard:1; /* discard requests */
unsigned int ar_errors:2; /* errors=withdraw | panic */
unsigned int ar_nobarrier:1; /* do not send barriers */
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
int ar_commit; /* Commit interval */
int ar_statfs_quantum; /* The fast statfs interval */
int ar_quota_quantum; /* The quota interval */
int ar_statfs_percent; /* The % change to force sync */
};
struct gfs2_tune {
spinlock_t gt_spin;
unsigned int gt_logd_secs;
unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
unsigned int gt_quota_scale_num; /* Numerator */
unsigned int gt_quota_scale_den; /* Denominator */
unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
unsigned int gt_new_files_jdata;
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_complain_secs;
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
};
enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
SDF_SHUTDOWN = 2,
SDF_NOBARRIERS = 3,
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
SDF_NOJOURNALID = 6,
SDF_RORECOVERY = 7, /* read only recovery */
SDF_SKIP_DLM_UNLOCK = 8,
};
#define GFS2_FSNAME_LEN 256
struct gfs2_inum_host {
u64 no_formal_ino;
u64 no_addr;
};
struct gfs2_sb_host {
u32 sb_magic;
u32 sb_type;
u32 sb_format;
u32 sb_fs_format;
u32 sb_multihost_format;
u32 sb_bsize;
u32 sb_bsize_shift;
struct gfs2_inum_host sb_master_dir;
struct gfs2_inum_host sb_root_dir;
char sb_lockproto[GFS2_LOCKNAME_LEN];
char sb_locktable[GFS2_LOCKNAME_LEN];
};
/*
* lm_mount() return values
*
* ls_jid - the journal ID this node should use
* ls_first - this node is the first to mount the file system
* ls_lockspace - lock module's context for this file system
* ls_ops - lock module's functions
*/
struct lm_lockstruct {
int ls_jid;
unsigned int ls_first;
const struct lm_lockops *ls_ops;
dlm_lockspace_t *ls_dlm;
int ls_recover_jid_done; /* These two are deprecated, */
int ls_recover_jid_status; /* used previously by gfs_controld */
struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
struct dlm_lksb ls_control_lksb; /* control_lock */
char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
char *ls_lvb_bits;
spinlock_t ls_recover_spin; /* protects following fields */
unsigned long ls_recover_flags; /* DFL_ */
uint32_t ls_recover_mount; /* gen in first recover_done cb */
uint32_t ls_recover_start; /* gen in last recover_done cb */
uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
uint32_t ls_recover_size; /* size of recover_submit, recover_result */
uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
uint32_t *ls_recover_result; /* result of last jid recovery */
};
struct gfs2_pcpu_lkstats {
/* One struct for each glock type */
struct gfs2_lkstats lkstats[10];
};
struct gfs2_sbd {
struct super_block *sd_vfs;
struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
struct kobject sd_kobj;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
/* Constants computed on mount */
u32 sd_fsb2bb;
u32 sd_fsb2bb_shift;
u32 sd_diptrs; /* Number of pointers in a dinode */
u32 sd_inptrs; /* Number of pointers in a indirect block */
u32 sd_jbsize; /* Size of a journaled data block */
u32 sd_hash_bsize; /* sizeof(exhash block) */
u32 sd_hash_bsize_shift;
u32 sd_hash_ptrs; /* Number of pointers in a hash block */
u32 sd_qc_per_block;
u32 sd_blocks_per_bitmap;
u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
u32 sd_max_height; /* Max height of a file's metadata tree */
u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
u32 sd_max_jheight; /* Max height of journaled file's meta tree */
u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
struct gfs2_args sd_args; /* Mount arguments */
struct gfs2_tune sd_tune; /* Filesystem tuning structure */
/* Lock Stuff */
struct lm_lockstruct sd_lockstruct;
struct gfs2_holder sd_live_gh;
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_freeze_gl;
wait_queue_head_t sd_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
struct completion sd_wdack;
struct delayed_work sd_control_work;
/* Inode Stuff */
struct dentry *sd_master_dir;
struct dentry *sd_root_dir;
struct inode *sd_jindex;
struct inode *sd_statfs_inode;
struct inode *sd_sc_inode;
struct inode *sd_qc_inode;
struct inode *sd_rindex;
struct inode *sd_quota_inode;
/* StatFS stuff */
spinlock_t sd_statfs_spin;
struct gfs2_statfs_change_host sd_statfs_master;
struct gfs2_statfs_change_host sd_statfs_local;
int sd_statfs_force_sync;
/* Resource group stuff */
int sd_rindex_uptodate;
spinlock_t sd_rindex_spin;
struct rb_root sd_rindex_tree;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;
/* Journal index stuff */
struct list_head sd_jindex_list;
spinlock_t sd_jindex_spin;
struct mutex sd_jindex_mutex;
unsigned int sd_journals;
struct gfs2_jdesc *sd_jdesc;
struct gfs2_holder sd_journal_gh;
struct gfs2_holder sd_jinode_gh;
struct gfs2_holder sd_sc_gh;
struct gfs2_holder sd_qc_gh;
struct completion sd_journal_ready;
/* Daemon stuff */
struct task_struct *sd_logd_process;
struct task_struct *sd_quotad_process;
/* Quota stuff */
struct list_head sd_quota_list;
atomic_t sd_quota_count;
struct mutex sd_quota_mutex;
struct mutex sd_quota_sync_mutex;
wait_queue_head_t sd_quota_wait;
struct list_head sd_trunc_list;
spinlock_t sd_trunc_lock;
unsigned int sd_quota_slots;
unsigned long *sd_quota_bitmap;
spinlock_t sd_bitmap_lock;
u64 sd_quota_sync_gen;
/* Log stuff */
struct address_space sd_aspace;
spinlock_t sd_log_lock;
struct gfs2_trans *sd_log_tr;
unsigned int sd_log_blks_reserved;
int sd_log_commited_revoke;
atomic_t sd_log_pinned;
unsigned int sd_log_num_revoke;
struct list_head sd_log_le_revoke;
struct list_head sd_log_le_ordered;
spinlock_t sd_ordered_lock;
atomic_t sd_log_thresh1;
atomic_t sd_log_thresh2;
atomic_t sd_log_blks_free;
wait_queue_head_t sd_log_waitq;
wait_queue_head_t sd_logd_waitq;
u64 sd_log_sequence;
unsigned int sd_log_head;
unsigned int sd_log_tail;
int sd_log_idle;
struct rw_semaphore sd_log_flush_lock;
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
int sd_log_error;
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
spinlock_t sd_ail_lock;
struct list_head sd_ail1_list;
struct list_head sd_ail2_list;
/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
struct gfs2_holder sd_freeze_root_gh;
struct gfs2_holder sd_thaw_gh;
atomic_t sd_log_freeze;
atomic_t sd_frozen_root;
wait_queue_head_t sd_frozen_root_wait;
wait_queue_head_t sd_log_frozen_wait;
char sd_fsname[GFS2_FSNAME_LEN];
char sd_table_name[GFS2_FSNAME_LEN];
char sd_proto_name[GFS2_FSNAME_LEN];
/* Debugging crud */
unsigned long sd_last_warning;
struct dentry *debugfs_dir; /* debugfs directory */
struct dentry *debugfs_dentry_glocks;
struct dentry *debugfs_dentry_glstats;
struct dentry *debugfs_dentry_sbstats;
};
static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
{
gl->gl_stats.stats[which]++;
}
static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
{
const struct gfs2_sbd *sdp = gl->gl_sbd;
preempt_disable();
this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
preempt_enable();
}
#endif /* __INCORE_DOT_H__ */

2006
fs/gfs2/inode.c Normal file

File diff suppressed because it is too large Load diff

141
fs/gfs2/inode.h Normal file
View file

@ -0,0 +1,141 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __INODE_DOT_H__
#define __INODE_DOT_H__
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include "util.h"
extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
extern int gfs2_internal_read(struct gfs2_inode *ip,
char *buf, loff_t *pos, unsigned size);
extern void gfs2_set_aops(struct inode *inode);
static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
{
return !ip->i_height;
}
static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
{
return ip->i_diskflags & GFS2_DIF_JDATA;
}
static inline int gfs2_is_writeback(const struct gfs2_inode *ip)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip);
}
static inline int gfs2_is_ordered(const struct gfs2_inode *ip)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip);
}
static inline int gfs2_is_dir(const struct gfs2_inode *ip)
{
return S_ISDIR(ip->i_inode.i_mode);
}
static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks)
{
inode->i_blocks = blocks <<
(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
}
static inline u64 gfs2_get_inode_blocks(const struct inode *inode)
{
return inode->i_blocks >>
(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
}
static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change)
{
gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks > -change));
change *= (GFS2_SB(inode)->sd_sb.sb_bsize/GFS2_BASIC_BLOCK);
inode->i_blocks += change;
}
static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
u64 no_formal_ino)
{
return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
}
static inline void gfs2_inum_out(const struct gfs2_inode *ip,
struct gfs2_dirent *dent)
{
dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
}
static inline int gfs2_check_internal_file_size(struct inode *inode,
u64 minsize, u64 maxsize)
{
u64 size = i_size_read(inode);
if (size < minsize || size > maxsize)
goto err;
if (size & ((1 << inode->i_blkbits) - 1))
goto err;
return 0;
err:
gfs2_consist_inode(GFS2_I(inode));
return -EIO;
}
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
u64 no_addr, u64 no_formal_ino,
int non_block);
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino,
unsigned int blktype);
extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
extern int gfs2_permission(struct inode *inode, int mask);
extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
extern int gfs2_open_common(struct inode *inode, struct file *file);
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
extern const struct inode_operations gfs2_symlink_iops;
extern const struct file_operations gfs2_file_fops_nolock;
extern const struct file_operations gfs2_dir_fops_nolock;
extern void gfs2_set_inode_flags(struct inode *inode);
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
extern const struct file_operations gfs2_file_fops;
extern const struct file_operations gfs2_dir_fops;
static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
{
return sdp->sd_args.ar_localflocks;
}
#else /* Single node only */
#define gfs2_file_fops gfs2_file_fops_nolock
#define gfs2_dir_fops gfs2_dir_fops_nolock
static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
{
return 1;
}
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
#endif /* __INODE_DOT_H__ */

1336
fs/gfs2/lock_dlm.c Normal file

File diff suppressed because it is too large Load diff

950
fs/gfs2/log.c Normal file
View file

@ -0,0 +1,950 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
#include <linux/list_sort.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "util.h"
#include "dir.h"
#include "trace_gfs2.h"
/**
* gfs2_struct2blk - compute stuff
* @sdp: the filesystem
* @nstruct: the number of structures
* @ssize: the size of the structures
*
* Compute the number of log descriptor blocks needed to hold a certain number
* of structures of a certain size.
*
* Returns: the number of blocks needed (minimum is always 1)
*/
unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
unsigned int ssize)
{
unsigned int blks;
unsigned int first, second;
blks = 1;
first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
if (nstruct > first) {
second = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header)) / ssize;
blks += DIV_ROUND_UP(nstruct - first, second);
}
return blks;
}
/**
* gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
* @mapping: The associated mapping (maybe NULL)
* @bd: The gfs2_bufdata to remove
*
* The ail lock _must_ be held when calling this function
*
*/
void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
{
bd->bd_tr = NULL;
list_del_init(&bd->bd_ail_st_list);
list_del_init(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bd->bd_bh);
}
/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
* @wbc: The writeback control structure
* @ai: The ail structure
*
*/
static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
struct writeback_control *wbc,
struct gfs2_trans *tr)
__releases(&sdp->sd_ail_lock)
__acquires(&sdp->sd_ail_lock)
{
struct gfs2_glock *gl = NULL;
struct address_space *mapping;
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_tr == tr);
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
continue;
}
if (!buffer_dirty(bh))
continue;
if (gl == bd->bd_gl)
continue;
gl = bd->bd_gl;
list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
mapping = bh->b_page->mapping;
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
generic_writepages(mapping, wbc);
spin_lock(&sdp->sd_ail_lock);
if (wbc->nr_to_write <= 0)
break;
return 1;
}
return 0;
}
/**
* gfs2_ail1_flush - start writeback of some ail1 entries
* @sdp: The super block
* @wbc: The writeback control structure
*
* Writes back some ail1 entries, according to the limits in the
* writeback control structure
*/
void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
{
struct list_head *head = &sdp->sd_ail1_list;
struct gfs2_trans *tr;
struct blk_plug plug;
trace_gfs2_ail_flush(sdp, wbc, 1);
blk_start_plug(&plug);
spin_lock(&sdp->sd_ail_lock);
restart:
list_for_each_entry_reverse(tr, head, tr_list) {
if (wbc->nr_to_write <= 0)
break;
if (gfs2_ail1_start_one(sdp, wbc, tr))
goto restart;
}
spin_unlock(&sdp->sd_ail_lock);
blk_finish_plug(&plug);
trace_gfs2_ail_flush(sdp, wbc, 0);
}
/**
* gfs2_ail1_start - start writeback of all ail1 entries
* @sdp: The superblock
*/
static void gfs2_ail1_start(struct gfs2_sbd *sdp)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
};
return gfs2_ail1_flush(sdp, &wbc);
}
/**
* gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @ai: the AIL entry
*
*/
static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_tr == tr);
if (buffer_busy(bh))
continue;
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
}
}
/**
* gfs2_ail1_empty - Try to empty the ail1 lists
* @sdp: The superblock
*
* Tries to empty the ail1 lists, starting with the oldest first
*/
static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr, *s;
int oldest_tr = 1;
int ret;
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
gfs2_ail1_empty_one(sdp, tr);
if (list_empty(&tr->tr_ail1_list) && oldest_tr)
list_move(&tr->tr_list, &sdp->sd_ail2_list);
else
oldest_tr = 0;
}
ret = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
return ret;
}
static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
bh = bd->bd_bh;
if (!buffer_locked(bh))
continue;
get_bh(bh);
spin_unlock(&sdp->sd_ail_lock);
wait_on_buffer(bh);
brelse(bh);
return;
}
}
spin_unlock(&sdp->sd_ail_lock);
}
/**
* gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @ai: the AIL entry
*
*/
static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head = &tr->tr_ail2_list;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
bd = list_entry(head->prev, struct gfs2_bufdata,
bd_ail_st_list);
gfs2_assert(sdp, bd->bd_tr == tr);
gfs2_remove_from_ail(bd);
}
}
static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
{
struct gfs2_trans *tr, *safe;
unsigned int old_tail = sdp->sd_log_tail;
int wrap = (new_tail < old_tail);
int a, b, rm;
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
a = (old_tail <= tr->tr_first);
b = (tr->tr_first < new_tail);
rm = (wrap) ? (a || b) : (a && b);
if (!rm)
continue;
gfs2_ail2_empty_one(sdp, tr);
list_del(&tr->tr_list);
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
kfree(tr);
}
spin_unlock(&sdp->sd_ail_lock);
}
/**
* gfs2_log_release - Release a given number of log blocks
* @sdp: The GFS2 superblock
* @blks: The number of blocks
*
*/
void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
{
atomic_add(blks, &sdp->sd_log_blks_free);
trace_gfs2_log_blocks(sdp, blks);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
up_read(&sdp->sd_log_flush_lock);
}
/**
* gfs2_log_reserve - Make a log reservation
* @sdp: The GFS2 superblock
* @blks: The number of blocks to reserve
*
* Note that we never give out the last few blocks of the journal. Thats
* due to the fact that there is a small number of header blocks
* associated with each log flush. The exact number can't be known until
* flush time, so we ensure that we have just enough free blocks at all
* times to avoid running out during a log flush.
*
* We no longer flush the log here, instead we wake up logd to do that
* for us. To avoid the thundering herd and to ensure that we deal fairly
* with queued waiters, we use an exclusive wait. This means that when we
* get woken with enough journal space to get our reservation, we need to
* wake the next waiter on the list.
*
* Returns: errno
*/
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
{
unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
unsigned wanted = blks + reserved_blks;
DEFINE_WAIT(wait);
int did_wait = 0;
unsigned int free_blocks;
if (gfs2_assert_warn(sdp, blks) ||
gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
return -EINVAL;
retry:
free_blocks = atomic_read(&sdp->sd_log_blks_free);
if (unlikely(free_blocks <= wanted)) {
do {
prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
TASK_UNINTERRUPTIBLE);
wake_up(&sdp->sd_logd_waitq);
did_wait = 1;
if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
io_schedule();
free_blocks = atomic_read(&sdp->sd_log_blks_free);
} while(free_blocks <= wanted);
finish_wait(&sdp->sd_log_waitq, &wait);
}
if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
free_blocks - blks) != free_blocks)
goto retry;
trace_gfs2_log_blocks(sdp, -blks);
/*
* If we waited, then so might others, wake them up _after_ we get
* our share of the log.
*/
if (unlikely(did_wait))
wake_up(&sdp->sd_log_waitq);
down_read(&sdp->sd_log_flush_lock);
if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
gfs2_log_release(sdp, blks);
return -EROFS;
}
return 0;
}
/**
* log_distance - Compute distance between two journal blocks
* @sdp: The GFS2 superblock
* @newer: The most recent journal block of the pair
* @older: The older journal block of the pair
*
* Compute the distance (in the journal direction) between two
* blocks in the journal
*
* Returns: the distance in blocks
*/
static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
unsigned int older)
{
int dist;
dist = newer - older;
if (dist < 0)
dist += sdp->sd_jdesc->jd_blocks;
return dist;
}
/**
* calc_reserved - Calculate the number of blocks to reserve when
* refunding a transaction's unused buffers.
* @sdp: The GFS2 superblock
*
* This is complex. We need to reserve room for all our currently used
* metadata buffers (e.g. normal file I/O rewriting file time stamps) and
* all our journaled data buffers for journaled files (e.g. files in the
* meta_fs like rindex, or files for which chattr +j was done.)
* If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
* will count it as free space (sd_log_blks_free) and corruption will follow.
*
* We can have metadata bufs and jdata bufs in the same journal. So each
* type gets its own log header, for which we need to reserve a block.
* In fact, each type has the potential for needing more than one header
* in cases where we have more buffers than will fit on a journal page.
* Metadata journal entries take up half the space of journaled buffer entries.
* Thus, metadata entries have buf_limit (502) and journaled buffers have
* databuf_limit (251) before they cause a wrap around.
*
* Also, we need to reserve blocks for revoke journal entries and one for an
* overall header for the lot.
*
* Returns: the number of blocks reserved
*/
static unsigned int calc_reserved(struct gfs2_sbd *sdp)
{
unsigned int reserved = 0;
unsigned int mbuf;
unsigned int dbuf;
struct gfs2_trans *tr = sdp->sd_log_tr;
if (tr) {
mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
reserved = mbuf + dbuf;
/* Account for header blocks */
reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp));
reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
}
if (sdp->sd_log_commited_revoke > 0)
reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
sizeof(u64));
/* One for the overall header */
if (reserved)
reserved++;
return reserved;
}
static unsigned int current_tail(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr;
unsigned int tail;
spin_lock(&sdp->sd_ail_lock);
if (list_empty(&sdp->sd_ail1_list)) {
tail = sdp->sd_log_head;
} else {
tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
tr_list);
tail = tr->tr_first;
}
spin_unlock(&sdp->sd_ail_lock);
return tail;
}
static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
{
unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
ail2_empty(sdp, new_tail);
atomic_add(dist, &sdp->sd_log_blks_free);
trace_gfs2_log_blocks(sdp, dist);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
sdp->sd_log_tail = new_tail;
}
static void log_flush_wait(struct gfs2_sbd *sdp)
{
DEFINE_WAIT(wait);
if (atomic_read(&sdp->sd_log_in_flight)) {
do {
prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&sdp->sd_log_in_flight))
io_schedule();
} while(atomic_read(&sdp->sd_log_in_flight));
finish_wait(&sdp->sd_log_flush_wait, &wait);
}
}
static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct gfs2_inode *ipa, *ipb;
ipa = list_entry(a, struct gfs2_inode, i_ordered);
ipb = list_entry(b, struct gfs2_inode, i_ordered);
if (ipa->i_no_addr < ipb->i_no_addr)
return -1;
if (ipa->i_no_addr > ipb->i_no_addr)
return 1;
return 0;
}
static void gfs2_ordered_write(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip;
LIST_HEAD(written);
spin_lock(&sdp->sd_ordered_lock);
list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
list_move(&ip->i_ordered, &written);
if (ip->i_inode.i_mapping->nrpages == 0)
continue;
spin_unlock(&sdp->sd_ordered_lock);
filemap_fdatawrite(ip->i_inode.i_mapping);
spin_lock(&sdp->sd_ordered_lock);
}
list_splice(&written, &sdp->sd_log_le_ordered);
spin_unlock(&sdp->sd_ordered_lock);
}
static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip;
spin_lock(&sdp->sd_ordered_lock);
while (!list_empty(&sdp->sd_log_le_ordered)) {
ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
list_del(&ip->i_ordered);
WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
if (ip->i_inode.i_mapping->nrpages == 0)
continue;
spin_unlock(&sdp->sd_ordered_lock);
filemap_fdatawait(ip->i_inode.i_mapping);
spin_lock(&sdp->sd_ordered_lock);
}
spin_unlock(&sdp->sd_ordered_lock);
}
void gfs2_ordered_del_inode(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
spin_lock(&sdp->sd_ordered_lock);
if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
list_del(&ip->i_ordered);
spin_unlock(&sdp->sd_ordered_lock);
}
void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
struct buffer_head *bh = bd->bd_bh;
struct gfs2_glock *gl = bd->bd_gl;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
gfs2_remove_from_ail(bd); /* drops ref on bh */
bd->bd_bh = NULL;
bd->bd_ops = &gfs2_revoke_lops;
sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes);
set_bit(GLF_LFLUSH, &gl->gl_flags);
list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
}
void gfs2_write_revokes(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr;
struct gfs2_bufdata *bd, *tmp;
int have_revokes = 0;
int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
gfs2_ail1_empty(sdp);
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
if (list_empty(&bd->bd_list)) {
have_revokes = 1;
goto done;
}
}
}
done:
spin_unlock(&sdp->sd_ail_lock);
if (have_revokes == 0)
return;
while (sdp->sd_log_num_revoke > max_revokes)
max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
max_revokes -= sdp->sd_log_num_revoke;
if (!sdp->sd_log_num_revoke) {
atomic_dec(&sdp->sd_log_blks_free);
/* If no blocks have been reserved, we need to also
* reserve a block for the header */
if (!sdp->sd_log_blks_reserved)
atomic_dec(&sdp->sd_log_blks_free);
}
gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
if (max_revokes == 0)
goto out_of_blocks;
if (!list_empty(&bd->bd_list))
continue;
gfs2_add_revoke(sdp, bd);
max_revokes--;
}
}
out_of_blocks:
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
if (!sdp->sd_log_num_revoke) {
atomic_inc(&sdp->sd_log_blks_free);
if (!sdp->sd_log_blks_reserved)
atomic_inc(&sdp->sd_log_blks_free);
}
}
/**
* log_write_header - Get and initialize a journal header buffer
* @sdp: The GFS2 superblock
*
* Returns: the initialized log buffer descriptor
*/
static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
{
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
int rw = WRITE_FLUSH_FUA | REQ_META;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
lh = page_address(page);
clear_page(lh);
tail = current_tail(sdp);
lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
lh->lh_header.__pad0 = cpu_to_be64(0);
lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
lh->lh_flags = cpu_to_be32(flags);
lh->lh_tail = cpu_to_be32(tail);
lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
log_flush_wait(sdp);
rw = WRITE_SYNC | REQ_META | REQ_PRIO;
}
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
gfs2_log_write_page(sdp, page);
gfs2_log_flush_bio(sdp, rw);
log_flush_wait(sdp);
if (sdp->sd_log_tail != tail)
log_pull_tail(sdp, tail);
}
/**
* gfs2_log_flush - flush incore transaction(s)
* @sdp: the filesystem
* @gl: The glock structure to flush. If NULL, flush the whole incore log
*
*/
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
enum gfs2_flush_type type)
{
struct gfs2_trans *tr;
down_write(&sdp->sd_log_flush_lock);
/* Log might have been flushed while we waited for the flush lock */
if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
up_write(&sdp->sd_log_flush_lock);
return;
}
trace_gfs2_log_flush(sdp, 1);
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
tr = sdp->sd_log_tr;
if (tr) {
sdp->sd_log_tr = NULL;
INIT_LIST_HEAD(&tr->tr_ail1_list);
INIT_LIST_HEAD(&tr->tr_ail2_list);
tr->tr_first = sdp->sd_log_flush_head;
}
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
gfs2_ordered_write(sdp);
lops_before_commit(sdp, tr);
gfs2_log_flush_bio(sdp, WRITE);
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
log_write_header(sdp, 0);
} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, 0);
}
lops_after_commit(sdp, tr);
gfs2_log_lock(sdp);
sdp->sd_log_head = sdp->sd_log_flush_head;
sdp->sd_log_blks_reserved = 0;
sdp->sd_log_commited_revoke = 0;
spin_lock(&sdp->sd_ail_lock);
if (tr && !list_empty(&tr->tr_ail1_list)) {
list_add(&tr->tr_list, &sdp->sd_ail1_list);
tr = NULL;
}
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
if (atomic_read(&sdp->sd_log_freeze))
type = FREEZE_FLUSH;
if (type != NORMAL_FLUSH) {
if (!sdp->sd_log_idle) {
for (;;) {
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
if (gfs2_ail1_empty(sdp))
break;
}
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
sdp->sd_log_flush_wrapped = 0;
log_write_header(sdp, 0);
sdp->sd_log_head = sdp->sd_log_flush_head;
}
if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH)
gfs2_log_shutdown(sdp);
if (type == FREEZE_FLUSH) {
int error;
atomic_set(&sdp->sd_log_freeze, 0);
wake_up(&sdp->sd_log_frozen_wait);
error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
LM_ST_SHARED, 0,
&sdp->sd_thaw_gh);
if (error) {
printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
gfs2_assert_withdraw(sdp, 0);
}
else
gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
}
}
trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);
kfree(tr);
}
/**
* gfs2_merge_trans - Merge a new transaction into a cached transaction
* @old: Original transaction to be expanded
* @new: New transaction to be merged
*/
static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
{
WARN_ON_ONCE(old->tr_attached != 1);
old->tr_num_buf_new += new->tr_num_buf_new;
old->tr_num_databuf_new += new->tr_num_databuf_new;
old->tr_num_buf_rm += new->tr_num_buf_rm;
old->tr_num_databuf_rm += new->tr_num_databuf_rm;
old->tr_num_revoke += new->tr_num_revoke;
old->tr_num_revoke_rm += new->tr_num_revoke_rm;
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
}
static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
unsigned int reserved;
unsigned int unused;
unsigned int maxres;
gfs2_log_lock(sdp);
if (sdp->sd_log_tr) {
gfs2_merge_trans(sdp->sd_log_tr, tr);
} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
gfs2_assert_withdraw(sdp, tr->tr_alloced);
sdp->sd_log_tr = tr;
tr->tr_attached = 1;
}
sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
reserved = calc_reserved(sdp);
maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
gfs2_assert_withdraw(sdp, maxres >= reserved);
unused = maxres - reserved;
atomic_add(unused, &sdp->sd_log_blks_free);
trace_gfs2_log_blocks(sdp, unused);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
sdp->sd_log_blks_reserved = reserved;
gfs2_log_unlock(sdp);
}
/**
* gfs2_log_commit - Commit a transaction to the log
* @sdp: the filesystem
* @tr: the transaction
*
* We wake up gfs2_logd if the number of pinned blocks exceed thresh1
* or the total number of used blocks (pinned blocks plus AIL blocks)
* is greater than thresh2.
*
* At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
* journal size.
*
* Returns: errno
*/
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
log_refund(sdp, tr);
if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
atomic_read(&sdp->sd_log_thresh2)))
wake_up(&sdp->sd_logd_waitq);
}
/**
* gfs2_log_shutdown - write a shutdown header into a journal
* @sdp: the filesystem
*
*/
void gfs2_log_shutdown(struct gfs2_sbd *sdp)
{
gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
sdp->sd_log_head = sdp->sd_log_flush_head;
sdp->sd_log_tail = sdp->sd_log_head;
}
static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
{
return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze));
}
static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
{
unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
}
/**
* gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
* @sdp: Pointer to GFS2 superblock
*
* Also, periodically check to make sure that we're using the most recent
* journal index.
*/
int gfs2_logd(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
DEFINE_WAIT(wait);
while (!kthread_should_stop()) {
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
}
if (gfs2_ail_flush_reqd(sdp)) {
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
}
if (!gfs2_ail_flush_reqd(sdp))
wake_up(&sdp->sd_log_waitq);
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
try_to_freeze();
do {
prepare_to_wait(&sdp->sd_logd_waitq, &wait,
TASK_INTERRUPTIBLE);
if (!gfs2_ail_flush_reqd(sdp) &&
!gfs2_jrnl_flush_reqd(sdp) &&
!kthread_should_stop())
t = schedule_timeout(t);
} while(t && !gfs2_ail_flush_reqd(sdp) &&
!gfs2_jrnl_flush_reqd(sdp) &&
!kthread_should_stop());
finish_wait(&sdp->sd_logd_waitq, &wait);
}
return 0;
}

85
fs/gfs2/log.h Normal file
View file

@ -0,0 +1,85 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __LOG_DOT_H__
#define __LOG_DOT_H__
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/writeback.h>
#include "incore.h"
/**
* gfs2_log_lock - acquire the right to mess with the log manager
* @sdp: the filesystem
*
*/
static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
__acquires(&sdp->sd_log_lock)
{
spin_lock(&sdp->sd_log_lock);
}
/**
* gfs2_log_unlock - release the right to mess with the log manager
* @sdp: the filesystem
*
*/
static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
__releases(&sdp->sd_log_lock)
{
spin_unlock(&sdp->sd_log_lock);
}
static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
unsigned int value)
{
if (++value == sdp->sd_jdesc->jd_blocks) {
value = 0;
}
sdp->sd_log_head = sdp->sd_log_tail = value;
}
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
spin_lock(&sdp->sd_ordered_lock);
if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
spin_unlock(&sdp->sd_ordered_lock);
}
}
extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
unsigned int ssize);
extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
enum gfs2_flush_type {
NORMAL_FLUSH = 0,
SYNC_FLUSH,
SHUTDOWN_FLUSH,
FREEZE_FLUSH
};
extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
enum gfs2_flush_type type);
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
extern int gfs2_logd(void *data);
extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
extern void gfs2_write_revokes(struct gfs2_sbd *sdp);
#endif /* __LOG_DOT_H__ */

886
fs/gfs2/lops.c Normal file
View file

@ -0,0 +1,886 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/mempool.h>
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/list_sort.h>
#include "gfs2.h"
#include "incore.h"
#include "inode.h"
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
#include "trace_gfs2.h"
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: The superblock
* @bh: The buffer to be pinned
*
* The log lock must be held when calling this function
*/
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
BUG_ON(!current->journal_info);
clear_buffer_dirty(bh);
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
bd = bh->b_private;
/* If this buffer is in the AIL and it has already been written
* to in-place disk block, remove it from the AIL.
*/
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr)
list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
spin_unlock(&sdp->sd_ail_lock);
get_bh(bh);
atomic_inc(&sdp->sd_log_pinned);
trace_gfs2_pin(bd, 1);
}
static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
{
return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
}
static void maybe_release_space(struct gfs2_bufdata *bd)
{
struct gfs2_glock *gl = bd->bd_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_rgrpd *rgd = gl->gl_object;
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
if (bi->bi_clone == NULL)
return;
if (sdp->sd_args.ar_discard)
gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
memcpy(bi->bi_clone + bi->bi_offset,
bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
clear_bit(GBF_FULL, &bi->bi_flags);
rgd->rd_free_clone = rgd->rd_free;
rgd->rd_extfail_pt = rgd->rd_free;
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
* @flags: The inode dirty flags
*
*/
static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_trans *tr)
{
struct gfs2_bufdata *bd = bh->b_private;
BUG_ON(!buffer_uptodate(bh));
BUG_ON(!buffer_pinned(bh));
lock_buffer(bh);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
if (buffer_is_rgrp(bd))
maybe_release_space(bd);
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_tr = tr;
list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
trace_gfs2_pin(bd, 0);
unlock_buffer(bh);
atomic_dec(&sdp->sd_log_pinned);
}
static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
{
BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
(sdp->sd_log_flush_head != sdp->sd_log_head));
if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
sdp->sd_log_flush_head = 0;
sdp->sd_log_flush_wrapped = 1;
}
}
static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
{
unsigned int lbn = sdp->sd_log_flush_head;
struct gfs2_journal_extent *je;
u64 block;
list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) {
if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) {
block = je->dblock + lbn - je->lblock;
gfs2_log_incr_head(sdp);
return block;
}
}
return -1;
}
/**
* gfs2_end_log_write_bh - end log write of pagecache data with buffers
* @sdp: The superblock
* @bvec: The bio_vec
* @error: The i/o status
*
* This finds the relavent buffers and unlocks then and sets the
* error flag according to the status of the i/o request. This is
* used when the log is writing data which has an in-place version
* that is pinned in the pagecache.
*/
static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
int error)
{
struct buffer_head *bh, *next;
struct page *page = bvec->bv_page;
unsigned size;
bh = page_buffers(page);
size = bvec->bv_len;
while (bh_offset(bh) < bvec->bv_offset)
bh = bh->b_this_page;
do {
if (error)
set_buffer_write_io_error(bh);
unlock_buffer(bh);
next = bh->b_this_page;
size -= bh->b_size;
brelse(bh);
bh = next;
} while(bh && size);
}
/**
* gfs2_end_log_write - end of i/o to the log
* @bio: The bio
* @error: Status of i/o request
*
* Each bio_vec contains either data from the pagecache or data
* relating to the log itself. Here we iterate over the bio_vec
* array, processing both kinds of data.
*
*/
static void gfs2_end_log_write(struct bio *bio, int error)
{
struct gfs2_sbd *sdp = bio->bi_private;
struct bio_vec *bvec;
struct page *page;
int i;
if (error) {
sdp->sd_log_error = error;
fs_err(sdp, "Error %d writing to log\n", error);
}
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (page_has_buffers(page))
gfs2_end_log_write_bh(sdp, bvec, error);
else
mempool_free(page, gfs2_page_pool);
}
bio_put(bio);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
* gfs2_log_flush_bio - Submit any pending log bio
* @sdp: The superblock
* @rw: The rw flags
*
* Submit any pending part-built or full bio to the block device. If
* there is no pending bio, then this is a no-op.
*/
void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
{
if (sdp->sd_log_bio) {
atomic_inc(&sdp->sd_log_in_flight);
submit_bio(rw, sdp->sd_log_bio);
sdp->sd_log_bio = NULL;
}
}
/**
* gfs2_log_alloc_bio - Allocate a new bio for log writing
* @sdp: The superblock
* @blkno: The next device block number we want to write to
*
* This should never be called when there is a cached bio in the
* super block. When it returns, there will be a cached bio in the
* super block which will have as many bio_vecs as the device is
* happy to handle.
*
* Returns: Newly allocated bio
*/
static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
{
struct super_block *sb = sdp->sd_vfs;
unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
struct bio *bio;
BUG_ON(sdp->sd_log_bio);
while (1) {
bio = bio_alloc(GFP_NOIO, nrvecs);
if (likely(bio))
break;
nrvecs = max(nrvecs/2, 1U);
}
bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio->bi_end_io = gfs2_end_log_write;
bio->bi_private = sdp;
sdp->sd_log_bio = bio;
return bio;
}
/**
* gfs2_log_get_bio - Get cached log bio, or allocate a new one
* @sdp: The superblock
* @blkno: The device block number we want to write to
*
* If there is a cached bio, then if the next block number is sequential
* with the previous one, return it, otherwise flush the bio to the
* device. If there is not a cached bio, or we just flushed it, then
* allocate a new one.
*
* Returns: The bio to use for log writes
*/
static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
{
struct bio *bio = sdp->sd_log_bio;
u64 nblk;
if (bio) {
nblk = bio_end_sector(bio);
nblk >>= sdp->sd_fsb2bb_shift;
if (blkno == nblk)
return bio;
gfs2_log_flush_bio(sdp, WRITE);
}
return gfs2_log_alloc_bio(sdp, blkno);
}
/**
* gfs2_log_write - write to log
* @sdp: the filesystem
* @page: the page to write
* @size: the size of the data to write
* @offset: the offset within the page
*
* Try and add the page segment to the current bio. If that fails,
* submit the current bio to the device and create a new one, and
* then add the page segment to that.
*/
static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
unsigned size, unsigned offset)
{
u64 blkno = gfs2_log_bmap(sdp);
struct bio *bio;
int ret;
bio = gfs2_log_get_bio(sdp, blkno);
ret = bio_add_page(bio, page, size, offset);
if (ret == 0) {
gfs2_log_flush_bio(sdp, WRITE);
bio = gfs2_log_alloc_bio(sdp, blkno);
ret = bio_add_page(bio, page, size, offset);
WARN_ON(ret == 0);
}
}
/**
* gfs2_log_write_bh - write a buffer's content to the log
* @sdp: The super block
* @bh: The buffer pointing to the in-place location
*
* This writes the content of the buffer to the next available location
* in the log. The buffer will be unlocked once the i/o to the log has
* completed.
*/
static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
}
/**
* gfs2_log_write_page - write one block stored in a page, into the log
* @sdp: The superblock
* @page: The struct page
*
* This writes the first block-sized part of the page into the log. Note
* that the page must have been allocated from the gfs2_page_pool mempool
* and that after this has been called, ownership has been transferred and
* the page may be freed at any time.
*/
void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
{
struct super_block *sb = sdp->sd_vfs;
gfs2_log_write(sdp, page, sb->s_blocksize, 0);
}
static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
u32 ld_length, u32 ld_data1)
{
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
struct gfs2_log_descriptor *ld = page_address(page);
clear_page(ld);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(ld_type);
ld->ld_length = cpu_to_be32(ld_length);
ld->ld_data1 = cpu_to_be32(ld_data1);
ld->ld_data2 = 0;
return page;
}
static void gfs2_check_magic(struct buffer_head *bh)
{
void *kaddr;
__be32 *ptr;
clear_buffer_escaped(bh);
kaddr = kmap_atomic(bh->b_page);
ptr = kaddr + bh_offset(bh);
if (*ptr == cpu_to_be32(GFS2_MAGIC))
set_buffer_escaped(bh);
kunmap_atomic(kaddr);
}
static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct gfs2_bufdata *bda, *bdb;
bda = list_entry(a, struct gfs2_bufdata, bd_list);
bdb = list_entry(b, struct gfs2_bufdata, bd_list);
if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
return -1;
if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
return 1;
return 0;
}
static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
unsigned int total, struct list_head *blist,
bool is_databuf)
{
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd1 = NULL, *bd2;
struct page *page;
unsigned int num;
unsigned n;
__be64 *ptr;
gfs2_log_lock(sdp);
list_sort(NULL, blist, blocknr_cmp);
bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
while(total) {
num = total;
if (total > limit)
num = limit;
gfs2_log_unlock(sdp);
page = gfs2_get_log_desc(sdp,
is_databuf ? GFS2_LOG_DESC_JDATA :
GFS2_LOG_DESC_METADATA, num + 1, num);
ld = page_address(page);
gfs2_log_lock(sdp);
ptr = (__be64 *)(ld + 1);
n = 0;
list_for_each_entry_continue(bd1, blist, bd_list) {
*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
if (is_databuf) {
gfs2_check_magic(bd1->bd_bh);
*ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
}
if (++n >= num)
break;
}
gfs2_log_unlock(sdp);
gfs2_log_write_page(sdp, page);
gfs2_log_lock(sdp);
n = 0;
list_for_each_entry_continue(bd2, blist, bd_list) {
get_bh(bd2->bd_bh);
gfs2_log_unlock(sdp);
lock_buffer(bd2->bd_bh);
if (buffer_escaped(bd2->bd_bh)) {
void *kaddr;
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
ptr = page_address(page);
kaddr = kmap_atomic(bd2->bd_bh->b_page);
memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
bd2->bd_bh->b_size);
kunmap_atomic(kaddr);
*(__be32 *)ptr = 0;
clear_buffer_escaped(bd2->bd_bh);
unlock_buffer(bd2->bd_bh);
brelse(bd2->bd_bh);
gfs2_log_write_page(sdp, page);
} else {
gfs2_log_write_bh(sdp, bd2->bd_bh);
}
gfs2_log_lock(sdp);
if (++n >= num)
break;
}
BUG_ON(total < num);
total -= num;
}
gfs2_log_unlock(sdp);
}
static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
unsigned int nbuf;
if (tr == NULL)
return;
nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
}
static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head;
struct gfs2_bufdata *bd;
if (tr == NULL)
return;
head = &tr->tr_buf;
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
gfs2_unpin(sdp, bd->bd_bh, tr);
}
}
static void buf_lo_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass)
{
if (pass != 0)
return;
jd->jd_found_blocks = 0;
jd->jd_replayed_blocks = 0;
}
static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
unsigned int blks = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh_log, *bh_ip;
u64 blkno;
int error = 0;
if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
return 0;
gfs2_replay_incr_blk(sdp, &start);
for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
blkno = be64_to_cpu(*ptr++);
jd->jd_found_blocks++;
if (gfs2_revoke_check(jd, blkno, start))
continue;
error = gfs2_replay_read_block(jd, start, &bh_log);
if (error)
return error;
bh_ip = gfs2_meta_new(gl, blkno);
memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
if (gfs2_meta_check(sdp, bh_ip))
error = -EIO;
else
mark_buffer_dirty(bh_ip);
brelse(bh_log);
brelse(bh_ip);
if (error)
break;
jd->jd_replayed_blocks++;
}
return error;
}
/**
* gfs2_meta_sync - Sync all buffers associated with a glock
* @gl: The glock
*
*/
static void gfs2_meta_sync(struct gfs2_glock *gl)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
struct gfs2_sbd *sdp = gl->gl_sbd;
int error;
if (mapping == NULL)
mapping = &sdp->sd_aspace;
filemap_fdatawrite(mapping);
error = filemap_fdatawait(mapping);
if (error)
gfs2_io_error(gl->gl_sbd);
}
static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
gfs2_meta_sync(ip->i_gl);
return;
}
if (pass != 1)
return;
gfs2_meta_sync(ip->i_gl);
fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
}
static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct gfs2_meta_header *mh;
unsigned int offset;
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_bufdata *bd;
struct page *page;
unsigned int length;
gfs2_write_revokes(sdp);
if (!sdp->sd_log_num_revoke)
return;
length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
offset = sizeof(struct gfs2_log_descriptor);
list_for_each_entry(bd, head, bd_list) {
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
gfs2_log_write_page(sdp, page);
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
mh = page_address(page);
clear_page(mh);
mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
offset = sizeof(struct gfs2_meta_header);
}
*(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
offset += sizeof(u64);
}
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_log_write_page(sdp, page);
}
static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_bufdata *bd;
struct gfs2_glock *gl;
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
gl = bd->bd_gl;
atomic_dec(&gl->gl_revokes);
clear_bit(GLF_LFLUSH, &gl->gl_flags);
kmem_cache_free(gfs2_bufdata_cachep, bd);
}
}
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass)
{
if (pass != 0)
return;
jd->jd_found_revokes = 0;
jd->jd_replay_tail = head->lh_tail;
}
static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
unsigned int blks = be32_to_cpu(ld->ld_length);
unsigned int revokes = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh;
unsigned int offset;
u64 blkno;
int first = 1;
int error;
if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
return 0;
offset = sizeof(struct gfs2_log_descriptor);
for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
if (!first)
gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
error = gfs2_revoke_add(jd, blkno, start);
if (error < 0) {
brelse(bh);
return error;
}
else if (error)
jd->jd_found_revokes++;
if (!--revokes)
break;
offset += sizeof(u64);
}
brelse(bh);
offset = sizeof(struct gfs2_meta_header);
first = 0;
}
return 0;
}
static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
gfs2_revoke_clean(jd);
return;
}
if (pass != 1)
return;
fs_info(sdp, "jid=%u: Found %u revoke tags\n",
jd->jd_jid, jd->jd_found_revokes);
gfs2_revoke_clean(jd);
}
/**
* databuf_lo_before_commit - Scan the data buffers, writing as we go
*
*/
static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
unsigned int limit = databuf_limit(sdp);
unsigned int nbuf;
if (tr == NULL)
return;
nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
}
static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld,
__be64 *ptr, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
unsigned int blks = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh_log, *bh_ip;
u64 blkno;
u64 esc;
int error = 0;
if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
return 0;
gfs2_replay_incr_blk(sdp, &start);
for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
blkno = be64_to_cpu(*ptr++);
esc = be64_to_cpu(*ptr++);
jd->jd_found_blocks++;
if (gfs2_revoke_check(jd, blkno, start))
continue;
error = gfs2_replay_read_block(jd, start, &bh_log);
if (error)
return error;
bh_ip = gfs2_meta_new(gl, blkno);
memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
/* Unescape */
if (esc) {
__be32 *eptr = (__be32 *)bh_ip->b_data;
*eptr = cpu_to_be32(GFS2_MAGIC);
}
mark_buffer_dirty(bh_ip);
brelse(bh_log);
brelse(bh_ip);
jd->jd_replayed_blocks++;
}
return error;
}
/* FIXME: sort out accounting for log blocks etc. */
static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
gfs2_meta_sync(ip->i_gl);
return;
}
if (pass != 1)
return;
/* data sync? */
gfs2_meta_sync(ip->i_gl);
fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
}
static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head;
struct gfs2_bufdata *bd;
if (tr == NULL)
return;
head = &tr->tr_databuf;
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
gfs2_unpin(sdp, bd->bd_bh, tr);
}
}
const struct gfs2_log_operations gfs2_buf_lops = {
.lo_before_commit = buf_lo_before_commit,
.lo_after_commit = buf_lo_after_commit,
.lo_before_scan = buf_lo_before_scan,
.lo_scan_elements = buf_lo_scan_elements,
.lo_after_scan = buf_lo_after_scan,
.lo_name = "buf",
};
const struct gfs2_log_operations gfs2_revoke_lops = {
.lo_before_commit = revoke_lo_before_commit,
.lo_after_commit = revoke_lo_after_commit,
.lo_before_scan = revoke_lo_before_scan,
.lo_scan_elements = revoke_lo_scan_elements,
.lo_after_scan = revoke_lo_after_scan,
.lo_name = "revoke",
};
const struct gfs2_log_operations gfs2_databuf_lops = {
.lo_before_commit = databuf_lo_before_commit,
.lo_after_commit = databuf_lo_after_commit,
.lo_scan_elements = databuf_lo_scan_elements,
.lo_after_scan = databuf_lo_after_scan,
.lo_name = "databuf",
};
const struct gfs2_log_operations *gfs2_log_ops[] = {
&gfs2_databuf_lops,
&gfs2_buf_lops,
&gfs2_revoke_lops,
NULL,
};

104
fs/gfs2/lops.h Normal file
View file

@ -0,0 +1,104 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __LOPS_DOT_H__
#define __LOPS_DOT_H__
#include <linux/list.h>
#include "incore.h"
#define BUF_OFFSET \
((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
~(sizeof(__be64) - 1))
#define DATABUF_OFFSET \
((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
~(2 * sizeof(__be64) - 1))
extern const struct gfs2_log_operations gfs2_glock_lops;
extern const struct gfs2_log_operations gfs2_buf_lops;
extern const struct gfs2_log_operations gfs2_revoke_lops;
extern const struct gfs2_log_operations gfs2_databuf_lops;
extern const struct gfs2_log_operations *gfs2_log_ops[];
extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
unsigned int limit;
limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
return limit;
}
static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
{
unsigned int limit;
limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
return limit;
}
static inline void lops_before_commit(struct gfs2_sbd *sdp,
struct gfs2_trans *tr)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_before_commit)
gfs2_log_ops[x]->lo_before_commit(sdp, tr);
}
static inline void lops_after_commit(struct gfs2_sbd *sdp,
struct gfs2_trans *tr)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_after_commit)
gfs2_log_ops[x]->lo_after_commit(sdp, tr);
}
static inline void lops_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head,
unsigned int pass)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_before_scan)
gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
}
static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld,
__be64 *ptr,
unsigned int pass)
{
int x, error;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_scan_elements) {
error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
ld, ptr, pass);
if (error)
return error;
}
return 0;
}
static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
unsigned int pass)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_before_scan)
gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
}
#endif /* __LOPS_DOT_H__ */

249
fs/gfs2/main.c Normal file
View file

@ -0,0 +1,249 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
#include <linux/atomic.h>
#include <linux/mempool.h>
#include "gfs2.h"
#include "incore.h"
#include "super.h"
#include "sys.h"
#include "util.h"
#include "glock.h"
#include "quota.h"
#include "recovery.h"
#include "dir.h"
struct workqueue_struct *gfs2_control_wq;
static void gfs2_init_inode_once(void *foo)
{
struct gfs2_inode *ip = foo;
inode_init_once(&ip->i_inode);
init_rwsem(&ip->i_rw_mutex);
INIT_LIST_HEAD(&ip->i_trunc_list);
ip->i_res = NULL;
ip->i_hash_cache = NULL;
}
static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
INIT_HLIST_BL_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_spin);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
atomic_set(&gl->gl_revokes, 0);
}
static void gfs2_init_gl_aspace_once(void *foo)
{
struct gfs2_glock *gl = foo;
struct address_space *mapping = (struct address_space *)(gl + 1);
gfs2_init_glock_once(gl);
address_space_init_once(mapping);
}
/**
* init_gfs2_fs - Register GFS2 as a filesystem
*
* Returns: 0 on success, error code on failure
*/
static int __init init_gfs2_fs(void)
{
int error;
gfs2_str2qstr(&gfs2_qdot, ".");
gfs2_str2qstr(&gfs2_qdotdot, "..");
gfs2_quota_hash_init();
error = gfs2_sys_init();
if (error)
return error;
error = list_lru_init(&gfs2_qd_lru);
if (error)
goto fail_lru;
error = gfs2_glock_init();
if (error)
goto fail;
error = -ENOMEM;
gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
sizeof(struct gfs2_glock),
0, 0,
gfs2_init_glock_once);
if (!gfs2_glock_cachep)
goto fail;
gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)",
sizeof(struct gfs2_glock) +
sizeof(struct address_space),
0, 0, gfs2_init_gl_aspace_once);
if (!gfs2_glock_aspace_cachep)
goto fail;
gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
sizeof(struct gfs2_inode),
0, SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD,
gfs2_init_inode_once);
if (!gfs2_inode_cachep)
goto fail;
gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
sizeof(struct gfs2_bufdata),
0, 0, NULL);
if (!gfs2_bufdata_cachep)
goto fail;
gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd",
sizeof(struct gfs2_rgrpd),
0, 0, NULL);
if (!gfs2_rgrpd_cachep)
goto fail;
gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
sizeof(struct gfs2_quota_data),
0, 0, NULL);
if (!gfs2_quotad_cachep)
goto fail;
gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk",
sizeof(struct gfs2_blkreserv),
0, 0, NULL);
if (!gfs2_rsrv_cachep)
goto fail;
register_shrinker(&gfs2_qd_shrinker);
error = register_filesystem(&gfs2_fs_type);
if (error)
goto fail;
error = register_filesystem(&gfs2meta_fs_type);
if (error)
goto fail_unregister;
error = -ENOMEM;
gfs_recovery_wq = alloc_workqueue("gfs_recovery",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
if (!gfs_recovery_wq)
goto fail_wq;
gfs2_control_wq = alloc_workqueue("gfs2_control",
WQ_UNBOUND | WQ_FREEZABLE, 0);
if (!gfs2_control_wq)
goto fail_recovery;
gfs2_page_pool = mempool_create_page_pool(64, 0);
if (!gfs2_page_pool)
goto fail_control;
gfs2_register_debugfs();
pr_info("GFS2 installed\n");
return 0;
fail_control:
destroy_workqueue(gfs2_control_wq);
fail_recovery:
destroy_workqueue(gfs_recovery_wq);
fail_wq:
unregister_filesystem(&gfs2meta_fs_type);
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
list_lru_destroy(&gfs2_qd_lru);
fail_lru:
unregister_shrinker(&gfs2_qd_shrinker);
gfs2_glock_exit();
if (gfs2_rsrv_cachep)
kmem_cache_destroy(gfs2_rsrv_cachep);
if (gfs2_quotad_cachep)
kmem_cache_destroy(gfs2_quotad_cachep);
if (gfs2_rgrpd_cachep)
kmem_cache_destroy(gfs2_rgrpd_cachep);
if (gfs2_bufdata_cachep)
kmem_cache_destroy(gfs2_bufdata_cachep);
if (gfs2_inode_cachep)
kmem_cache_destroy(gfs2_inode_cachep);
if (gfs2_glock_aspace_cachep)
kmem_cache_destroy(gfs2_glock_aspace_cachep);
if (gfs2_glock_cachep)
kmem_cache_destroy(gfs2_glock_cachep);
gfs2_sys_uninit();
return error;
}
/**
* exit_gfs2_fs - Unregister the file system
*
*/
static void __exit exit_gfs2_fs(void)
{
unregister_shrinker(&gfs2_qd_shrinker);
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
destroy_workqueue(gfs_recovery_wq);
destroy_workqueue(gfs2_control_wq);
list_lru_destroy(&gfs2_qd_lru);
rcu_barrier();
mempool_destroy(gfs2_page_pool);
kmem_cache_destroy(gfs2_rsrv_cachep);
kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
kmem_cache_destroy(gfs2_bufdata_cachep);
kmem_cache_destroy(gfs2_inode_cachep);
kmem_cache_destroy(gfs2_glock_aspace_cachep);
kmem_cache_destroy(gfs2_glock_cachep);
gfs2_sys_uninit();
}
MODULE_DESCRIPTION("Global File System");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");
module_init(init_gfs2_fs);
module_exit(exit_gfs2_fs);

403
fs/gfs2/meta_io.c Normal file
View file

@ -0,0 +1,403 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/delay.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
#include "trace_gfs2.h"
static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
{
struct buffer_head *bh, *head;
int nr_underway = 0;
int write_op = REQ_META | REQ_PRIO |
(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
BUG_ON(!PageLocked(page));
BUG_ON(!page_has_buffers(page));
head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh))
continue;
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
* potentially cause a busy-wait loop from flusher thread and kswapd
* activity, but those code paths have their own higher-level
* throttling.
*/
if (wbc->sync_mode != WB_SYNC_NONE) {
lock_buffer(bh);
} else if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
continue;
}
if (test_clear_buffer_dirty(bh)) {
mark_buffer_async_write(bh);
} else {
unlock_buffer(bh);
}
} while ((bh = bh->b_this_page) != head);
/*
* The page and its buffers are protected by PageWriteback(), so we can
* drop the bh refcounts early.
*/
BUG_ON(PageWriteback(page));
set_page_writeback(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
submit_bh(write_op, bh);
nr_underway++;
}
bh = next;
} while (bh != head);
unlock_page(page);
if (nr_underway == 0)
end_page_writeback(page);
return 0;
}
const struct address_space_operations gfs2_meta_aops = {
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
const struct address_space_operations gfs2_rgrp_aops = {
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
/**
* gfs2_getbuf - Get a buffer with a given address space
* @gl: the glock
* @blkno: the block number (filesystem scope)
* @create: 1 if the buffer should be created
*
* Returns: the buffer
*/
struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
struct gfs2_sbd *sdp = gl->gl_sbd;
struct page *page;
struct buffer_head *bh;
unsigned int shift;
unsigned long index;
unsigned int bufnum;
if (mapping == NULL)
mapping = &sdp->sd_aspace;
shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
bufnum = blkno - (index << shift); /* block buf index within page */
if (create) {
for (;;) {
page = grab_cache_page(mapping, index);
if (page)
break;
yield();
}
} else {
page = find_get_page_flags(mapping, index,
FGP_LOCK|FGP_ACCESSED);
if (!page)
return NULL;
}
if (!page_has_buffers(page))
create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
/* Locate header for our buffer within our page */
for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
/* Do nothing */;
get_bh(bh);
if (!buffer_mapped(bh))
map_bh(bh, sdp->sd_vfs, blkno);
unlock_page(page);
page_cache_release(page);
return bh;
}
static void meta_prep_new(struct buffer_head *bh)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
lock_buffer(bh);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
}
/**
* gfs2_meta_new - Get a block
* @gl: The glock associated with this block
* @blkno: The block number
*
* Returns: The buffer
*/
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
{
struct buffer_head *bh;
bh = gfs2_getbuf(gl, blkno, CREATE);
meta_prep_new(bh);
return bh;
}
/**
* gfs2_meta_read - Read a block from disk
* @gl: The glock covering the block
* @blkno: The block number
* @flags: flags
* @bhp: the place where the buffer is returned (NULL on failure)
*
* Returns: errno
*/
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct buffer_head *bh;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
*bhp = NULL;
return -EIO;
}
*bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return 0;
}
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
if (!(flags & DIO_WAIT))
return 0;
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh))) {
struct gfs2_trans *tr = current->journal_info;
if (tr && tr->tr_touched)
gfs2_io_error_bh(sdp, bh);
brelse(bh);
*bhp = NULL;
return -EIO;
}
return 0;
}
/**
* gfs2_meta_wait - Reread a block from disk
* @sdp: the filesystem
* @bh: The block to wait for
*
* Returns: errno
*/
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
struct gfs2_trans *tr = current->journal_info;
if (tr && tr->tr_touched)
gfs2_io_error_bh(sdp, bh);
return -EIO;
}
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
return 0;
}
void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
{
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
int was_pinned = 0;
if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0);
atomic_dec(&sdp->sd_log_pinned);
list_del_init(&bd->bd_list);
if (meta)
tr->tr_num_buf_rm++;
else
tr->tr_num_databuf_rm++;
tr->tr_touched = 1;
was_pinned = 1;
brelse(bh);
}
if (bd) {
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd);
} else if (was_pinned) {
bh->b_private = NULL;
kmem_cache_free(gfs2_bufdata_cachep, bd);
}
spin_unlock(&sdp->sd_ail_lock);
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
}
/**
* gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
* @ip: the inode who owns the buffers
* @bstart: the first buffer in the run
* @blen: the number of buffers in the run
*
*/
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *bh;
while (blen) {
bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
if (bh) {
lock_buffer(bh);
gfs2_log_lock(sdp);
gfs2_remove_from_journal(bh, current->journal_info, 1);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
brelse(bh);
}
bstart++;
blen--;
}
}
/**
* gfs2_meta_indirect_buffer - Get a metadata buffer
* @ip: The GFS2 inode
* @height: The level of this buf in the metadata (indir addr) tree (if any)
* @num: The block number (device relative) of the buffer
* @bhp: the buffer is returned here
*
* Returns: errno
*/
int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_glock *gl = ip->i_gl;
struct buffer_head *bh;
int ret = 0;
u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
brelse(bh);
ret = -EIO;
}
*bhp = bh;
return ret;
}
/**
* gfs2_meta_ra - start readahead on an extent of a file
* @gl: the glock the blocks belong to
* @dblock: the starting disk block
* @extlen: the number of blocks in the extent
*
* returns: the first buffer in the extent
*/
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct buffer_head *first_bh, *bh;
u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
sdp->sd_sb.sb_bsize_shift;
BUG_ON(!extlen);
if (max_ra < 1)
max_ra = 1;
if (extlen > max_ra)
extlen = max_ra;
first_bh = gfs2_getbuf(gl, dblock, CREATE);
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
dblock++;
extlen--;
while (extlen) {
bh = gfs2_getbuf(gl, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
ll_rw_block(READA | REQ_META, 1, &bh);
brelse(bh);
dblock++;
extlen--;
if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
goto out;
}
wait_on_buffer(first_bh);
out:
return first_bh;
}

78
fs/gfs2/meta_io.h Normal file
View file

@ -0,0 +1,78 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __DIO_DOT_H__
#define __DIO_DOT_H__
#include <linux/buffer_head.h>
#include <linux/string.h>
#include "incore.h"
static inline void gfs2_buffer_clear(struct buffer_head *bh)
{
memset(bh->b_data, 0, bh->b_size);
}
static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
{
BUG_ON(head > bh->b_size);
memset(bh->b_data + head, 0, bh->b_size - head);
}
static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
int to_head,
struct buffer_head *from_bh,
int from_head)
{
BUG_ON(from_head < to_head);
memcpy(to_bh->b_data + to_head, from_bh->b_data + from_head,
from_bh->b_size - from_head);
memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
0, from_head - to_head);
}
extern const struct address_space_operations gfs2_meta_aops;
extern const struct address_space_operations gfs2_rgrp_aops;
static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
{
struct inode *inode = mapping->host;
if (mapping->a_ops == &gfs2_meta_aops)
return (((struct gfs2_glock *)mapping) - 1)->gl_sbd;
else if (mapping->a_ops == &gfs2_rgrp_aops)
return container_of(mapping, struct gfs2_sbd, sd_aspace);
else
return inode->i_sb->s_fs_info;
}
extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head **bhp);
extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
int create);
extern void gfs2_remove_from_journal(struct buffer_head *bh,
struct gfs2_trans *tr, int meta);
extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head **bhp);
static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
struct buffer_head **bhp)
{
return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, bhp);
}
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
#define buffer_busy(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
#endif /* __DIO_DOT_H__ */

1417
fs/gfs2/ops_fstype.c Normal file

File diff suppressed because it is too large Load diff

1656
fs/gfs2/quota.c Normal file

File diff suppressed because it is too large Load diff

62
fs/gfs2/quota.h Normal file
View file

@ -0,0 +1,62 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __QUOTA_DOT_H__
#define __QUOTA_DOT_H__
#include <linux/list_lru.h>
struct gfs2_inode;
struct gfs2_sbd;
#define NO_UID_QUOTA_CHANGE INVALID_UID
#define NO_GID_QUOTA_CHANGE INVALID_GID
extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unhold(struct gfs2_inode *ip);
extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unlock(struct gfs2_inode *ip);
extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
kuid_t uid, kgid_t gid);
extern int gfs2_quota_sync(struct super_block *sb, int type);
extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
extern int gfs2_quota_init(struct gfs2_sbd *sdp);
extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
extern int gfs2_quotad(void *data);
extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int ret;
if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
return 0;
ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (ret)
return ret;
if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (ret)
gfs2_quota_unlock(ip);
return ret;
}
extern const struct quotactl_ops gfs2_quotactl_ops;
extern struct shrinker gfs2_qd_shrinker;
extern struct list_lru gfs2_qd_lru;
extern void __init gfs2_quota_hash_init(void);
#endif /* __QUOTA_DOT_H__ */

611
fs/gfs2/recovery.c Normal file
View file

@ -0,0 +1,611 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "super.h"
#include "util.h"
#include "dir.h"
struct workqueue_struct *gfs_recovery_wq;
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
int new = 0;
u64 dblock;
u32 extlen;
int error;
error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
if (error)
return error;
if (!dblock) {
gfs2_consist_inode(ip);
return -EIO;
}
*bh = gfs2_meta_ra(gl, dblock, extlen);
return error;
}
int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
{
struct list_head *head = &jd->jd_revoke_list;
struct gfs2_revoke_replay *rr;
int found = 0;
list_for_each_entry(rr, head, rr_list) {
if (rr->rr_blkno == blkno) {
found = 1;
break;
}
}
if (found) {
rr->rr_where = where;
return 0;
}
rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
if (!rr)
return -ENOMEM;
rr->rr_blkno = blkno;
rr->rr_where = where;
list_add(&rr->rr_list, head);
return 1;
}
int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
{
struct gfs2_revoke_replay *rr;
int wrap, a, b, revoke;
int found = 0;
list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) {
if (rr->rr_blkno == blkno) {
found = 1;
break;
}
}
if (!found)
return 0;
wrap = (rr->rr_where < jd->jd_replay_tail);
a = (jd->jd_replay_tail < where);
b = (where < rr->rr_where);
revoke = (wrap) ? (a || b) : (a && b);
return revoke;
}
void gfs2_revoke_clean(struct gfs2_jdesc *jd)
{
struct list_head *head = &jd->jd_revoke_list;
struct gfs2_revoke_replay *rr;
while (!list_empty(head)) {
rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
list_del(&rr->rr_list);
kfree(rr);
}
}
static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
{
const struct gfs2_log_header *str = buf;
if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
return 1;
lh->lh_sequence = be64_to_cpu(str->lh_sequence);
lh->lh_flags = be32_to_cpu(str->lh_flags);
lh->lh_tail = be32_to_cpu(str->lh_tail);
lh->lh_blkno = be32_to_cpu(str->lh_blkno);
lh->lh_hash = be32_to_cpu(str->lh_hash);
return 0;
}
/**
* get_log_header - read the log header for a given segment
* @jd: the journal
* @blk: the block to look at
* @lh: the log header to return
*
* Read the log header for a given segement in a given journal. Do a few
* sanity checks on it.
*
* Returns: 0 on success,
* 1 if the header was invalid or incomplete,
* errno on error
*/
static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
struct gfs2_log_header_host *head)
{
struct buffer_head *bh;
struct gfs2_log_header_host uninitialized_var(lh);
const u32 nothing = 0;
u32 hash;
int error;
error = gfs2_replay_read_block(jd, blk, &bh);
if (error)
return error;
hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
sizeof(u32));
hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
hash ^= (u32)~0;
error = gfs2_log_header_in(&lh, bh->b_data);
brelse(bh);
if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
return 1;
*head = lh;
return 0;
}
/**
* find_good_lh - find a good log header
* @jd: the journal
* @blk: the segment to start searching from
* @lh: the log header to fill in
* @forward: if true search forward in the log, else search backward
*
* Call get_log_header() to get a log header for a segment, but if the
* segment is bad, either scan forward or backward until we find a good one.
*
* Returns: errno
*/
static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
struct gfs2_log_header_host *head)
{
unsigned int orig_blk = *blk;
int error;
for (;;) {
error = get_log_header(jd, *blk, head);
if (error <= 0)
return error;
if (++*blk == jd->jd_blocks)
*blk = 0;
if (*blk == orig_blk) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
return -EIO;
}
}
}
/**
* jhead_scan - make sure we've found the head of the log
* @jd: the journal
* @head: this is filled in with the log descriptor of the head
*
* At this point, seg and lh should be either the head of the log or just
* before. Scan forward until we find the head.
*
* Returns: errno
*/
static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
unsigned int blk = head->lh_blkno;
struct gfs2_log_header_host lh;
int error;
for (;;) {
if (++blk == jd->jd_blocks)
blk = 0;
error = get_log_header(jd, blk, &lh);
if (error < 0)
return error;
if (error == 1)
continue;
if (lh.lh_sequence == head->lh_sequence) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
return -EIO;
}
if (lh.lh_sequence < head->lh_sequence)
break;
*head = lh;
}
return 0;
}
/**
* gfs2_find_jhead - find the head of a log
* @jd: the journal
* @head: the log descriptor for the head of the log is returned here
*
* Do a binary search of a journal and find the valid log entry with the
* highest sequence number. (i.e. the log head)
*
* Returns: errno
*/
int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_log_header_host lh_1, lh_m;
u32 blk_1, blk_2, blk_m;
int error;
blk_1 = 0;
blk_2 = jd->jd_blocks - 1;
for (;;) {
blk_m = (blk_1 + blk_2) / 2;
error = find_good_lh(jd, &blk_1, &lh_1);
if (error)
return error;
error = find_good_lh(jd, &blk_m, &lh_m);
if (error)
return error;
if (blk_1 == blk_m || blk_m == blk_2)
break;
if (lh_1.lh_sequence <= lh_m.lh_sequence)
blk_1 = blk_m;
else
blk_2 = blk_m;
}
error = jhead_scan(jd, &lh_1);
if (error)
return error;
*head = lh_1;
return error;
}
/**
* foreach_descriptor - go through the active part of the log
* @jd: the journal
* @start: the first log header in the active region
* @end: the last log header (don't process the contents of this entry))
*
* Call a given function once for every log descriptor in the active
* portion of the log.
*
* Returns: errno
*/
static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
unsigned int end, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
int error = 0;
u32 length;
__be64 *ptr;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
offset += sizeof(__be64) - 1;
offset &= ~(sizeof(__be64) - 1);
while (start != end) {
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
if (gfs2_meta_check(sdp, bh)) {
brelse(bh);
return -EIO;
}
ld = (struct gfs2_log_descriptor *)bh->b_data;
length = be32_to_cpu(ld->ld_length);
if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
struct gfs2_log_header_host lh;
error = get_log_header(jd, start, &lh);
if (!error) {
gfs2_replay_incr_blk(sdp, &start);
brelse(bh);
continue;
}
if (error == 1) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
error = -EIO;
}
brelse(bh);
return error;
} else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
brelse(bh);
return -EIO;
}
ptr = (__be64 *)(bh->b_data + offset);
error = lops_scan_elements(jd, start, ld, ptr, pass);
if (error) {
brelse(bh);
return error;
}
while (length--)
gfs2_replay_incr_blk(sdp, &start);
brelse(bh);
}
return 0;
}
/**
* clean_journal - mark a dirty journal as being clean
* @sdp: the filesystem
* @jd: the journal
* @gl: the journal's glock
* @head: the head journal to start from
*
* Returns: errno
*/
static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
unsigned int lblock;
struct gfs2_log_header *lh;
u32 hash;
struct buffer_head *bh;
int error;
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
lblock = head->lh_blkno;
gfs2_replay_incr_blk(sdp, &lblock);
bh_map.b_size = 1 << ip->i_inode.i_blkbits;
error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
if (error)
return error;
if (!bh_map.b_blocknr) {
gfs2_consist_inode(ip);
return -EIO;
}
bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
lh = (struct gfs2_log_header *)bh->b_data;
memset(lh, 0, sizeof(struct gfs2_log_header));
lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
lh->lh_header.__pad0 = cpu_to_be64(0);
lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
lh->lh_blkno = cpu_to_be32(lblock);
hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
set_buffer_dirty(bh);
if (sync_dirty_buffer(bh))
gfs2_io_error_bh(sdp, bh);
brelse(bh);
return error;
}
static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
unsigned int message)
{
char env_jid[20];
char env_status[20];
char *envp[] = { env_jid, env_status, NULL };
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ls->ls_recover_jid_done = jid;
ls->ls_recover_jid_status = message;
sprintf(env_jid, "JID=%d", jid);
sprintf(env_status, "RECOVERY=%s",
message == LM_RD_SUCCESS ? "Done" : "Failed");
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
}
void gfs2_recover_func(struct work_struct *work)
{
struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host head;
struct gfs2_holder j_gh, ji_gh, thaw_gh;
unsigned long t;
int ro = 0;
unsigned int pass;
int error;
int jlocked = 0;
if (sdp->sd_args.ar_spectator ||
(jd->jd_jid != sdp->sd_lockstruct.ls_jid)) {
fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
jd->jd_jid);
jlocked = 1;
/* Acquire the journal lock so we can do recovery */
error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
LM_ST_EXCLUSIVE,
LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
&j_gh);
switch (error) {
case 0:
break;
case GLR_TRYFAILED:
fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
error = 0;
default:
goto fail;
};
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
if (error)
goto fail_gunlock_j;
} else {
fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
}
fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
error = gfs2_jdesc_check(jd);
if (error)
goto fail_gunlock_ji;
error = gfs2_find_jhead(jd, &head);
if (error)
goto fail_gunlock_ji;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
jd->jd_jid);
t = jiffies;
/* Acquire a shared hold on the freeze lock */
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | LM_FLAG_PRIORITY,
&thaw_gh);
if (error)
goto fail_gunlock_ji;
if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
ro = 1;
} else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
ro = 1;
} else {
if (sdp->sd_vfs->s_flags & MS_RDONLY) {
/* check if device itself is read-only */
ro = bdev_read_only(sdp->sd_vfs->s_bdev);
if (!ro) {
fs_info(sdp, "recovery required on "
"read-only filesystem.\n");
fs_info(sdp, "write access will be "
"enabled during recovery.\n");
}
}
}
if (ro) {
fs_warn(sdp, "jid=%u: Can't replay: read-only block "
"device\n", jd->jd_jid);
error = -EROFS;
goto fail_gunlock_thaw;
}
fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
error = foreach_descriptor(jd, head.lh_tail,
head.lh_blkno, pass);
lops_after_scan(jd, error, pass);
if (error)
goto fail_gunlock_thaw;
}
error = clean_journal(jd, &head);
if (error)
goto fail_gunlock_thaw;
gfs2_glock_dq_uninit(&thaw_gh);
t = DIV_ROUND_UP(jiffies - t, HZ);
fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
jd->jd_jid, t);
}
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
gfs2_glock_dq_uninit(&j_gh);
}
fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
goto done;
fail_gunlock_thaw:
gfs2_glock_dq_uninit(&thaw_gh);
fail_gunlock_ji:
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
fail_gunlock_j:
gfs2_glock_dq_uninit(&j_gh);
}
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
fail:
jd->jd_recover_error = error;
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
done:
clear_bit(JDF_RECOVERY, &jd->jd_flags);
smp_mb__after_atomic();
wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
}
int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
{
int rv;
if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
return -EBUSY;
/* we have JDF_RECOVERY, queue should always succeed */
rv = queue_work(gfs_recovery_wq, &jd->jd_work);
BUG_ON(!rv);
if (wait)
wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
TASK_UNINTERRUPTIBLE);
return wait ? jd->jd_recover_error : 0;
}

36
fs/gfs2/recovery.h Normal file
View file

@ -0,0 +1,36 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __RECOVERY_DOT_H__
#define __RECOVERY_DOT_H__
#include "incore.h"
extern struct workqueue_struct *gfs_recovery_wq;
static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
{
if (++*blk == sdp->sd_jdesc->jd_blocks)
*blk = 0;
}
extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh);
extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
extern void gfs2_recover_func(struct work_struct *work);
#endif /* __RECOVERY_DOT_H__ */

2554
fs/gfs2/rgrp.c Normal file

File diff suppressed because it is too large Load diff

84
fs/gfs2/rgrp.h Normal file
View file

@ -0,0 +1,84 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __RGRP_DOT_H__
#define __RGRP_DOT_H__
#include <linux/slab.h>
#include <linux/uaccess.h>
/* Since each block in the file system is represented by two bits in the
* bitmap, one 64-bit word in the bitmap will represent 32 blocks.
* By reserving 32 blocks at a time, we can optimize / shortcut how we search
* through the bitmaps by looking a word at a time.
*/
#define RGRP_RSRV_MINBYTES 8
#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
struct gfs2_rgrpd;
struct gfs2_sbd;
struct gfs2_holder;
extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
#define GFS2_AF_ORLOV 1
extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap);
extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern int gfs2_rs_alloc(struct gfs2_inode *ip);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
extern void gfs2_unlink_di(struct inode *inode);
extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
unsigned int type);
struct gfs2_rgrp_list {
unsigned int rl_rgrps;
unsigned int rl_space;
struct gfs2_rgrpd **rl_rgd;
struct gfs2_holder *rl_ghs;
};
extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
u64 block);
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
extern int gfs2_fitrim(struct file *filp, void __user *argp);
/* This is how to tell if a reservation is in the rgrp tree: */
static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
{
return rs && !RB_EMPTY_NODE(&rs->rs_node);
}
extern void check_and_update_goal(struct gfs2_inode *ip);
#endif /* __RGRP_DOT_H__ */

1628
fs/gfs2/super.c Normal file

File diff suppressed because it is too large Load diff

57
fs/gfs2/super.h Normal file
View file

@ -0,0 +1,57 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __SUPER_DOT_H__
#define __SUPER_DOT_H__
#include <linux/fs.h>
#include <linux/dcache.h>
#include "incore.h"
extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
{
unsigned int x;
spin_lock(&sdp->sd_jindex_spin);
x = sdp->sd_journals;
spin_unlock(&sdp->sd_jindex_spin);
return x;
}
extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
extern int gfs2_mount_args(struct gfs2_args *args, char *data);
extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes);
extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
const void *buf);
extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
struct buffer_head *l_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
extern const struct export_operations gfs2_export_ops;
extern const struct super_operations gfs2_super_ops;
extern const struct dentry_operations gfs2_dops;
extern const struct xattr_handler *gfs2_xattr_handlers[];
#endif /* __SUPER_DOT_H__ */

711
fs/gfs2/sys.c Normal file
View file

@ -0,0 +1,711 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <asm/uaccess.h>
#include <linux/gfs2_ondisk.h>
#include <linux/genhd.h>
#include "gfs2.h"
#include "incore.h"
#include "sys.h"
#include "super.h"
#include "glock.h"
#include "quota.h"
#include "util.h"
#include "glops.h"
#include "recovery.h"
struct gfs2_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
};
static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
return a->show ? a->show(sdp, buf) : 0;
}
static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
return a->store ? a->store(sdp, buf, len) : len;
}
static const struct sysfs_ops gfs2_attr_ops = {
.show = gfs2_attr_show,
.store = gfs2_attr_store,
};
static struct kset *gfs2_kset;
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u:%u\n",
MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev));
}
static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
static int gfs2_uuid_valid(const u8 *uuid)
{
int i;
for (i = 0; i < 16; i++) {
if (uuid[i])
return 1;
}
return 0;
}
static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
{
struct super_block *s = sdp->sd_vfs;
const u8 *uuid = s->s_uuid;
buf[0] = '\0';
if (!gfs2_uuid_valid(uuid))
return 0;
return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
}
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
{
struct super_block *sb = sdp->sd_vfs;
int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
return snprintf(buf, PAGE_SIZE, "%u\n", frozen);
}
static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
int error;
int n = simple_strtol(buf, NULL, 0);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (n) {
case 0:
error = thaw_super(sdp->sd_vfs);
break;
case 1:
error = freeze_super(sdp->sd_vfs);
break;
default:
return -EINVAL;
}
if (error) {
fs_warn(sdp, "freeze %d error %d", n, error);
return error;
}
return len;
}
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
return snprintf(buf, PAGE_SIZE, "%u\n", b);
}
static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n");
return len;
}
static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_statfs_sync(sdp->sd_vfs, 0);
return len;
}
static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_quota_sync(sdp->sd_vfs, 0);
return len;
}
static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct kqid qid;
int error;
u32 id;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
id = simple_strtoul(buf, NULL, 0);
qid = make_kqid(current_user_ns(), USRQUOTA, id);
if (!qid_valid(qid))
return -EINVAL;
error = gfs2_quota_refresh(sdp, qid);
return error ? error : len;
}
static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct kqid qid;
int error;
u32 id;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
id = simple_strtoul(buf, NULL, 0);
qid = make_kqid(current_user_ns(), GRPQUOTA, id);
if (!qid_valid(qid))
return -EINVAL;
error = gfs2_quota_refresh(sdp, qid);
return error ? error : len;
}
static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
struct gfs2_glock *gl;
const struct gfs2_glock_operations *glops;
unsigned int glmode;
unsigned int gltype;
unsigned long long glnum;
char mode[16];
int rv;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
mode);
if (rv != 3)
return -EINVAL;
if (strcmp(mode, "EX") == 0)
glmode = LM_ST_UNLOCKED;
else if ((strcmp(mode, "CW") == 0) || (strcmp(mode, "DF") == 0))
glmode = LM_ST_DEFERRED;
else if ((strcmp(mode, "PR") == 0) || (strcmp(mode, "SH") == 0))
glmode = LM_ST_SHARED;
else
return -EINVAL;
if (gltype > LM_TYPE_JOURNAL)
return -EINVAL;
if (gltype == LM_TYPE_NONDISK && glnum == GFS2_FREEZE_LOCK)
glops = &gfs2_freeze_glops;
else
glops = gfs2_glops_list[gltype];
if (glops == NULL)
return -EINVAL;
if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags))
fs_info(sdp, "demote interface used\n");
rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
if (rv)
return rv;
gfs2_glock_cb(gl, glmode);
gfs2_glock_put(gl);
return len;
}
#define GFS2_ATTR(name, mode, show, store) \
static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
GFS2_ATTR(id, 0444, id_show, NULL);
GFS2_ATTR(fsname, 0444, fsname_show, NULL);
GFS2_ATTR(uuid, 0444, uuid_show, NULL);
GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store);
static struct attribute *gfs2_attrs[] = {
&gfs2_attr_id.attr,
&gfs2_attr_fsname.attr,
&gfs2_attr_uuid.attr,
&gfs2_attr_freeze.attr,
&gfs2_attr_withdraw.attr,
&gfs2_attr_statfs_sync.attr,
&gfs2_attr_quota_sync.attr,
&gfs2_attr_quota_refresh_user.attr,
&gfs2_attr_quota_refresh_group.attr,
&gfs2_attr_demote_rq.attr,
NULL,
};
static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
kfree(sdp);
}
static struct kobj_type gfs2_ktype = {
.release = gfs2_sbd_release,
.default_attrs = gfs2_attrs,
.sysfs_ops = &gfs2_attr_ops,
};
/*
* lock_module. Originally from lock_dlm
*/
static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
{
const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
return sprintf(buf, "%s\n", ops->lm_proto_name);
}
static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ssize_t ret;
int val = 0;
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
val = 1;
ret = sprintf(buf, "%d\n", val);
return ret;
}
static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ssize_t ret = len;
int val;
val = simple_strtol(buf, NULL, 0);
if (val == 1)
set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
else if (val == 0) {
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
smp_mb__after_atomic();
gfs2_glock_thaw(sdp);
} else {
ret = -EINVAL;
}
return ret;
}
static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
{
int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
return sprintf(buf, "%d\n", val);
}
static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
ssize_t ret = len;
int val;
val = simple_strtol(buf, NULL, 0);
if ((val == 1) &&
!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
complete(&sdp->sd_wdack);
else
ret = -EINVAL;
return ret;
}
static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_first);
}
static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
unsigned first;
int rv;
rv = sscanf(buf, "%u", &first);
if (rv != 1 || first > 1)
return -EINVAL;
rv = wait_for_completion_killable(&sdp->sd_locking_init);
if (rv)
return rv;
spin_lock(&sdp->sd_jindex_spin);
rv = -EBUSY;
if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
goto out;
rv = -EINVAL;
if (sdp->sd_args.ar_spectator)
goto out;
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
goto out;
sdp->sd_lockstruct.ls_first = first;
rv = 0;
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv ? rv : len;
}
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
}
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
{
struct gfs2_jdesc *jd;
int rv;
/* Wait for our primary journal to be initialized */
wait_for_completion(&sdp->sd_journal_ready);
spin_lock(&sdp->sd_jindex_spin);
rv = -EBUSY;
if (sdp->sd_jdesc->jd_jid == jid)
goto out;
rv = -ENOENT;
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
if (jd->jd_jid != jid)
continue;
rv = gfs2_recover_journal(jd, false);
break;
}
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv;
}
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
unsigned jid;
int rv;
rv = sscanf(buf, "%u", &jid);
if (rv != 1)
return -EINVAL;
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
rv = -ESHUTDOWN;
goto out;
}
rv = gfs2_recover_set(sdp, jid);
out:
return rv ? rv : len;
}
static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
}
static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
}
static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
{
return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid);
}
static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
int jid;
int rv;
rv = sscanf(buf, "%d", &jid);
if (rv != 1)
return -EINVAL;
rv = wait_for_completion_killable(&sdp->sd_locking_init);
if (rv)
return rv;
spin_lock(&sdp->sd_jindex_spin);
rv = -EINVAL;
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
goto out;
rv = -EBUSY;
if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
goto out;
rv = 0;
if (sdp->sd_args.ar_spectator && jid > 0)
rv = jid = -EINVAL;
sdp->sd_lockstruct.ls_jid = jid;
clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
smp_mb__after_atomic();
wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv ? rv : len;
}
#define GDLM_ATTR(_name,_mode,_show,_store) \
static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
GDLM_ATTR(block, 0644, block_show, block_store);
GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
GDLM_ATTR(jid, 0644, jid_show, jid_store);
GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0600, NULL, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
static struct attribute *lock_module_attrs[] = {
&gdlm_attr_proto_name.attr,
&gdlm_attr_block.attr,
&gdlm_attr_withdraw.attr,
&gdlm_attr_jid.attr,
&gdlm_attr_first.attr,
&gdlm_attr_first_done.attr,
&gdlm_attr_recover.attr,
&gdlm_attr_recover_done.attr,
&gdlm_attr_recover_status.attr,
NULL,
};
/*
* get and set struct gfs2_tune fields
*/
static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u %u\n",
sdp->sd_tune.gt_quota_scale_num,
sdp->sd_tune.gt_quota_scale_den);
}
static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x, y;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
return -EINVAL;
spin_lock(&gt->gt_spin);
gt->gt_quota_scale_num = x;
gt->gt_quota_scale_den = y;
spin_unlock(&gt->gt_spin);
return len;
}
static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
int check_zero, const char *buf, size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
x = simple_strtoul(buf, NULL, 0);
if (check_zero && !x)
return -EINVAL;
spin_lock(&gt->gt_spin);
*field = x;
spin_unlock(&gt->gt_spin);
return len;
}
#define TUNE_ATTR_3(name, show, store) \
static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store)
#define TUNE_ATTR_2(name, store) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \
} \
TUNE_ATTR_3(name, name##_show, store)
#define TUNE_ATTR(name, check_zero) \
static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
{ \
return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
} \
TUNE_ATTR_2(name, name##_store)
TUNE_ATTR(quota_warn_period, 0);
TUNE_ATTR(quota_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
static struct attribute *tune_attrs[] = {
&tune_attr_quota_warn_period.attr,
&tune_attr_quota_quantum.attr,
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
NULL,
};
static struct attribute_group tune_group = {
.name = "tune",
.attrs = tune_attrs,
};
static struct attribute_group lock_module_group = {
.name = "lock_module",
.attrs = lock_module_attrs,
};
int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
{
struct super_block *sb = sdp->sd_vfs;
int error;
char ro[20];
char spectator[20];
char *envp[] = { ro, spectator, NULL };
int sysfs_frees_sdp = 0;
sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
sdp->sd_kobj.kset = gfs2_kset;
error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
"%s", sdp->sd_table_name);
if (error)
goto fail_reg;
sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
function gfs2_sbd_release. */
error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
if (error)
goto fail_reg;
error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
if (error)
goto fail_tune;
error = sysfs_create_link(&sdp->sd_kobj,
&disk_to_dev(sb->s_bdev->bd_disk)->kobj,
"device");
if (error)
goto fail_lock_module;
kobject_uevent_env(&sdp->sd_kobj, KOBJ_ADD, envp);
return 0;
fail_lock_module:
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
fail_tune:
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_reg:
free_percpu(sdp->sd_lkstats);
fs_err(sdp, "error %d adding sysfs files", error);
if (sysfs_frees_sdp)
kobject_put(&sdp->sd_kobj);
else
kfree(sdp);
sb->s_fs_info = NULL;
return error;
}
void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
{
sysfs_remove_link(&sdp->sd_kobj, "device");
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
}
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct super_block *s = sdp->sd_vfs;
const u8 *uuid = s->s_uuid;
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
if (gfs2_uuid_valid(uuid))
add_uevent_var(env, "UUID=%pUB", uuid);
return 0;
}
static const struct kset_uevent_ops gfs2_uevent_ops = {
.uevent = gfs2_uevent,
};
int gfs2_sys_init(void)
{
gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
if (!gfs2_kset)
return -ENOMEM;
return 0;
}
void gfs2_sys_uninit(void)
{
kset_unregister(gfs2_kset);
}

25
fs/gfs2/sys.h Normal file
View file

@ -0,0 +1,25 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __SYS_DOT_H__
#define __SYS_DOT_H__
#include <linux/spinlock.h>
struct gfs2_sbd;
int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
int gfs2_sys_init(void);
void gfs2_sys_uninit(void);
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
#endif /* __SYS_DOT_H__ */

558
fs/gfs2/trace_gfs2.h Normal file
View file

@ -0,0 +1,558 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM gfs2
#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_GFS2_H
#include <linux/tracepoint.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/dlmconstants.h>
#include <linux/gfs2_ondisk.h>
#include <linux/writeback.h>
#include <linux/ktime.h>
#include "incore.h"
#include "glock.h"
#include "rgrp.h"
#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
#define glock_trace_name(x) __print_symbolic(x, \
dlm_state_name(IV), \
dlm_state_name(NL), \
dlm_state_name(CR), \
dlm_state_name(CW), \
dlm_state_name(PR), \
dlm_state_name(PW), \
dlm_state_name(EX))
#define block_state_name(x) __print_symbolic(x, \
{ GFS2_BLKST_FREE, "free" }, \
{ GFS2_BLKST_USED, "used" }, \
{ GFS2_BLKST_DINODE, "dinode" }, \
{ GFS2_BLKST_UNLINKED, "unlinked" })
#define TRACE_RS_DELETE 0
#define TRACE_RS_TREEDEL 1
#define TRACE_RS_INSERT 2
#define TRACE_RS_CLAIM 3
#define rs_func_name(x) __print_symbolic(x, \
{ 0, "del " }, \
{ 1, "tdel" }, \
{ 2, "ins " }, \
{ 3, "clm " })
#define show_glock_flags(flags) __print_flags(flags, "", \
{(1UL << GLF_LOCK), "l" }, \
{(1UL << GLF_DEMOTE), "D" }, \
{(1UL << GLF_PENDING_DEMOTE), "d" }, \
{(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \
{(1UL << GLF_DIRTY), "y" }, \
{(1UL << GLF_LFLUSH), "f" }, \
{(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \
{(1UL << GLF_REPLY_PENDING), "r" }, \
{(1UL << GLF_INITIAL), "I" }, \
{(1UL << GLF_FROZEN), "F" }, \
{(1UL << GLF_QUEUED), "q" }, \
{(1UL << GLF_LRU), "L" }, \
{(1UL << GLF_OBJECT), "o" }, \
{(1UL << GLF_BLOCKING), "b" })
#ifndef NUMPTY
#define NUMPTY
static inline u8 glock_trace_state(unsigned int state)
{
switch(state) {
case LM_ST_SHARED:
return DLM_LOCK_PR;
case LM_ST_DEFERRED:
return DLM_LOCK_CW;
case LM_ST_EXCLUSIVE:
return DLM_LOCK_EX;
}
return DLM_LOCK_NL;
}
#endif
/* Section 1 - Locking
*
* Objectives:
* Latency: Remote demote request to state change
* Latency: Local lock request to state change
* Latency: State change to lock grant
* Correctness: Ordering of local lock state vs. I/O requests
* Correctness: Responses to remote demote requests
*/
/* General glock state change (DLM lock request completes) */
TRACE_EVENT(gfs2_glock_state_change,
TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
TP_ARGS(gl, new_state),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, glnum )
__field( u32, gltype )
__field( u8, cur_state )
__field( u8, new_state )
__field( u8, dmt_state )
__field( u8, tgt_state )
__field( unsigned long, flags )
),
TP_fast_assign(
__entry->dev = gl->gl_sbd->sd_vfs->s_dev;
__entry->glnum = gl->gl_name.ln_number;
__entry->gltype = gl->gl_name.ln_type;
__entry->cur_state = glock_trace_state(gl->gl_state);
__entry->new_state = glock_trace_state(new_state);
__entry->tgt_state = glock_trace_state(gl->gl_target);
__entry->dmt_state = glock_trace_state(gl->gl_demote_state);
__entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
),
TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
glock_trace_name(__entry->cur_state),
glock_trace_name(__entry->new_state),
glock_trace_name(__entry->tgt_state),
glock_trace_name(__entry->dmt_state),
show_glock_flags(__entry->flags))
);
/* State change -> unlocked, glock is being deallocated */
TRACE_EVENT(gfs2_glock_put,
TP_PROTO(const struct gfs2_glock *gl),
TP_ARGS(gl),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, glnum )
__field( u32, gltype )
__field( u8, cur_state )
__field( unsigned long, flags )
),
TP_fast_assign(
__entry->dev = gl->gl_sbd->sd_vfs->s_dev;
__entry->gltype = gl->gl_name.ln_type;
__entry->glnum = gl->gl_name.ln_number;
__entry->cur_state = glock_trace_state(gl->gl_state);
__entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
),
TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->gltype, (unsigned long long)__entry->glnum,
glock_trace_name(__entry->cur_state),
glock_trace_name(DLM_LOCK_IV),
show_glock_flags(__entry->flags))
);
/* Callback (local or remote) requesting lock demotion */
TRACE_EVENT(gfs2_demote_rq,
TP_PROTO(const struct gfs2_glock *gl, bool remote),
TP_ARGS(gl, remote),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, glnum )
__field( u32, gltype )
__field( u8, cur_state )
__field( u8, dmt_state )
__field( unsigned long, flags )
__field( bool, remote )
),
TP_fast_assign(
__entry->dev = gl->gl_sbd->sd_vfs->s_dev;
__entry->gltype = gl->gl_name.ln_type;
__entry->glnum = gl->gl_name.ln_number;
__entry->cur_state = glock_trace_state(gl->gl_state);
__entry->dmt_state = glock_trace_state(gl->gl_demote_state);
__entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
__entry->remote = remote;
),
TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %s",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
glock_trace_name(__entry->cur_state),
glock_trace_name(__entry->dmt_state),
show_glock_flags(__entry->flags),
__entry->remote ? "remote" : "local")
);
/* Promotion/grant of a glock */
TRACE_EVENT(gfs2_promote,
TP_PROTO(const struct gfs2_holder *gh, int first),
TP_ARGS(gh, first),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, glnum )
__field( u32, gltype )
__field( int, first )
__field( u8, state )
),
TP_fast_assign(
__entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
__entry->glnum = gh->gh_gl->gl_name.ln_number;
__entry->gltype = gh->gh_gl->gl_name.ln_type;
__entry->first = first;
__entry->state = glock_trace_state(gh->gh_state);
),
TP_printk("%u,%u glock %u:%llu promote %s %s",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
__entry->first ? "first": "other",
glock_trace_name(__entry->state))
);
/* Queue/dequeue a lock request */
TRACE_EVENT(gfs2_glock_queue,
TP_PROTO(const struct gfs2_holder *gh, int queue),
TP_ARGS(gh, queue),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, glnum )
__field( u32, gltype )
__field( int, queue )
__field( u8, state )
),
TP_fast_assign(
__entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
__entry->glnum = gh->gh_gl->gl_name.ln_number;
__entry->gltype = gh->gh_gl->gl_name.ln_type;
__entry->queue = queue;
__entry->state = glock_trace_state(gh->gh_state);
),
TP_printk("%u,%u glock %u:%llu %squeue %s",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
__entry->queue ? "" : "de",
glock_trace_name(__entry->state))
);
/* DLM sends a reply to GFS2 */
TRACE_EVENT(gfs2_glock_lock_time,
TP_PROTO(const struct gfs2_glock *gl, s64 tdiff),
TP_ARGS(gl, tdiff),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, glnum )
__field( u32, gltype )
__field( int, status )
__field( char, flags )
__field( s64, tdiff )
__field( s64, srtt )
__field( s64, srttvar )
__field( s64, srttb )
__field( s64, srttvarb )
__field( s64, sirt )
__field( s64, sirtvar )
__field( s64, dcount )
__field( s64, qcount )
),
TP_fast_assign(
__entry->dev = gl->gl_sbd->sd_vfs->s_dev;
__entry->glnum = gl->gl_name.ln_number;
__entry->gltype = gl->gl_name.ln_type;
__entry->status = gl->gl_lksb.sb_status;
__entry->flags = gl->gl_lksb.sb_flags;
__entry->tdiff = tdiff;
__entry->srtt = gl->gl_stats.stats[GFS2_LKS_SRTT];
__entry->srttvar = gl->gl_stats.stats[GFS2_LKS_SRTTVAR];
__entry->srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
__entry->srttvarb = gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
__entry->sirt = gl->gl_stats.stats[GFS2_LKS_SIRT];
__entry->sirtvar = gl->gl_stats.stats[GFS2_LKS_SIRTVAR];
__entry->dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
__entry->qcount = gl->gl_stats.stats[GFS2_LKS_QCOUNT];
),
TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
__entry->status, __entry->flags,
(long long)__entry->tdiff,
(long long)__entry->srtt,
(long long)__entry->srttvar,
(long long)__entry->srttb,
(long long)__entry->srttvarb,
(long long)__entry->sirt,
(long long)__entry->sirtvar,
(long long)__entry->dcount,
(long long)__entry->qcount)
);
/* Section 2 - Log/journal
*
* Objectives:
* Latency: Log flush time
* Correctness: pin/unpin vs. disk I/O ordering
* Performance: Log usage stats
*/
/* Pin/unpin a block in the log */
TRACE_EVENT(gfs2_pin,
TP_PROTO(const struct gfs2_bufdata *bd, int pin),
TP_ARGS(bd, pin),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, pin )
__field( u32, len )
__field( sector_t, block )
__field( u64, ino )
),
TP_fast_assign(
__entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev;
__entry->pin = pin;
__entry->len = bd->bd_bh->b_size;
__entry->block = bd->bd_bh->b_blocknr;
__entry->ino = bd->bd_gl->gl_name.ln_number;
),
TP_printk("%u,%u log %s %llu/%lu inode %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->pin ? "pin" : "unpin",
(unsigned long long)__entry->block,
(unsigned long)__entry->len,
(unsigned long long)__entry->ino)
);
/* Flushing the log */
TRACE_EVENT(gfs2_log_flush,
TP_PROTO(const struct gfs2_sbd *sdp, int start),
TP_ARGS(sdp, start),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, start )
__field( u64, log_seq )
),
TP_fast_assign(
__entry->dev = sdp->sd_vfs->s_dev;
__entry->start = start;
__entry->log_seq = sdp->sd_log_sequence;
),
TP_printk("%u,%u log flush %s %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->start ? "start" : "end",
(unsigned long long)__entry->log_seq)
);
/* Reserving/releasing blocks in the log */
TRACE_EVENT(gfs2_log_blocks,
TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
TP_ARGS(sdp, blocks),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, blocks )
),
TP_fast_assign(
__entry->dev = sdp->sd_vfs->s_dev;
__entry->blocks = blocks;
),
TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->blocks)
);
/* Writing back the AIL */
TRACE_EVENT(gfs2_ail_flush,
TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
TP_ARGS(sdp, wbc, start),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, start )
__field( int, sync_mode )
__field( long, nr_to_write )
),
TP_fast_assign(
__entry->dev = sdp->sd_vfs->s_dev;
__entry->start = start;
__entry->sync_mode = wbc->sync_mode;
__entry->nr_to_write = wbc->nr_to_write;
),
TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->start ? "start" : "end",
__entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
__entry->nr_to_write)
);
/* Section 3 - bmap
*
* Objectives:
* Latency: Bmap request time
* Performance: Block allocator tracing
* Correctness: Test of disard generation vs. blocks allocated
*/
/* Map an extent of blocks, possibly a new allocation */
TRACE_EVENT(gfs2_bmap,
TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
sector_t lblock, int create, int errno),
TP_ARGS(ip, bh, lblock, create, errno),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( sector_t, lblock )
__field( sector_t, pblock )
__field( u64, inum )
__field( unsigned long, state )
__field( u32, len )
__field( int, create )
__field( int, errno )
),
TP_fast_assign(
__entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
__entry->lblock = lblock;
__entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0;
__entry->inum = ip->i_no_addr;
__entry->state = bh->b_state;
__entry->len = bh->b_size;
__entry->create = create;
__entry->errno = errno;
),
TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->lblock,
(unsigned long)__entry->len,
(unsigned long long)__entry->pblock,
__entry->state, __entry->create ? "create " : "nocreate",
__entry->errno)
);
/* Keep track of blocks as they are allocated/freed */
TRACE_EVENT(gfs2_block_alloc,
TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 block, unsigned len, u8 block_state),
TP_ARGS(ip, rgd, block, len, block_state),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, start )
__field( u64, inum )
__field( u32, len )
__field( u8, block_state )
__field( u64, rd_addr )
__field( u32, rd_free_clone )
__field( u32, rd_reserved )
),
TP_fast_assign(
__entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev;
__entry->start = block;
__entry->inum = ip->i_no_addr;
__entry->len = len;
__entry->block_state = block_state;
__entry->rd_addr = rgd->rd_addr;
__entry->rd_free_clone = rgd->rd_free_clone;
__entry->rd_reserved = rgd->rd_reserved;
),
TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long)__entry->len,
block_state_name(__entry->block_state),
(unsigned long long)__entry->rd_addr,
__entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
);
/* Keep track of multi-block reservations as they are allocated/freed */
TRACE_EVENT(gfs2_rs,
TP_PROTO(const struct gfs2_blkreserv *rs, u8 func),
TP_ARGS(rs, func),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, rd_addr )
__field( u32, rd_free_clone )
__field( u32, rd_reserved )
__field( u64, inum )
__field( u64, start )
__field( u32, free )
__field( u8, func )
),
TP_fast_assign(
__entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
__entry->rd_addr = rs->rs_rbm.rgd->rd_addr;
__entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone;
__entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved;
__entry->inum = rs->rs_inum;
__entry->start = gfs2_rbm_to_block(&rs->rs_rbm);
__entry->free = rs->rs_free;
__entry->func = func;
),
TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long long)__entry->rd_addr,
(unsigned long)__entry->rd_free_clone,
(unsigned long)__entry->rd_reserved,
rs_func_name(__entry->func), (unsigned long)__entry->free)
);
#endif /* _TRACE_GFS2_H */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_gfs2
#include <trace/define_trace.h>

270
fs/gfs2/trans.c Normal file
View file

@ -0,0 +1,270 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/kallsyms.h>
#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "inode.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "trans.h"
#include "util.h"
#include "trace_gfs2.h"
int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
unsigned int revokes)
{
struct gfs2_trans *tr;
int error;
BUG_ON(current->journal_info);
BUG_ON(blocks == 0 && revokes == 0);
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
return -EROFS;
tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
if (!tr)
return -ENOMEM;
tr->tr_ip = _RET_IP_;
tr->tr_blocks = blocks;
tr->tr_revokes = revokes;
tr->tr_reserved = 1;
tr->tr_alloced = 1;
if (blocks)
tr->tr_reserved += 6 + blocks;
if (revokes)
tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
sizeof(u64));
INIT_LIST_HEAD(&tr->tr_databuf);
INIT_LIST_HEAD(&tr->tr_buf);
sb_start_intwrite(sdp->sd_vfs);
error = gfs2_log_reserve(sdp, tr->tr_reserved);
if (error)
goto fail;
current->journal_info = tr;
return 0;
fail:
sb_end_intwrite(sdp->sd_vfs);
kfree(tr);
return error;
}
static void gfs2_print_trans(const struct gfs2_trans *tr)
{
pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip);
pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n",
tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched);
pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
tr->tr_num_buf_new, tr->tr_num_buf_rm,
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
tr->tr_num_revoke, tr->tr_num_revoke_rm);
}
void gfs2_trans_end(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr = current->journal_info;
s64 nbuf;
BUG_ON(!tr);
current->journal_info = NULL;
if (!tr->tr_touched) {
gfs2_log_release(sdp, tr->tr_reserved);
if (tr->tr_alloced)
kfree(tr);
sb_end_intwrite(sdp->sd_vfs);
return;
}
nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
nbuf -= tr->tr_num_buf_rm;
nbuf -= tr->tr_num_databuf_rm;
if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
(tr->tr_num_revoke <= tr->tr_revokes)))
gfs2_print_trans(tr);
gfs2_log_commit(sdp, tr);
if (tr->tr_alloced && !tr->tr_attached)
kfree(tr);
up_read(&sdp->sd_log_flush_lock);
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
sb_end_intwrite(sdp->sd_vfs);
}
static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
struct buffer_head *bh,
const struct gfs2_log_operations *lops)
{
struct gfs2_bufdata *bd;
bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
bd->bd_bh = bh;
bd->bd_gl = gl;
bd->bd_ops = lops;
INIT_LIST_HEAD(&bd->bd_list);
bh->b_private = bd;
return bd;
}
/**
* gfs2_trans_add_data - Add a databuf to the transaction.
* @gl: The inode glock associated with the buffer
* @bh: The buffer to add
*
* This is used in two distinct cases:
* i) In ordered write mode
* We put the data buffer on a list so that we can ensure that its
* synced to disk at the right time
* ii) In journaled data mode
* We need to journal the data block in the same way as metadata in
* the functions above. The difference is that here we have a tag
* which is two __be64's being the block number (as per meta data)
* and a flag which says whether the data block needs escaping or
* not. This means we need a new log entry for each 251 or so data
* blocks, which isn't an enormous overhead but twice as much as
* for normal metadata blocks.
*/
void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_sbd *sdp = gl->gl_sbd;
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
struct gfs2_bufdata *bd;
if (!gfs2_is_jdata(ip)) {
gfs2_ordered_add_inode(ip);
return;
}
lock_buffer(bh);
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd == NULL) {
gfs2_log_unlock(sdp);
unlock_buffer(bh);
if (bh->b_private == NULL)
bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
lock_buffer(bh);
gfs2_log_lock(sdp);
}
gfs2_assert(sdp, bd->bd_gl == gl);
tr->tr_touched = 1;
if (list_empty(&bd->bd_list)) {
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_databuf_new++;
list_add_tail(&bd->bd_list, &tr->tr_databuf);
}
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
struct gfs2_meta_header *mh;
struct gfs2_trans *tr;
tr = current->journal_info;
tr->tr_touched = 1;
if (!list_empty(&bd->bd_list))
return;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n",
(unsigned long long)bd->bd_bh->b_blocknr);
BUG();
}
gfs2_pin(sdp, bd->bd_bh);
mh->__pad0 = cpu_to_be64(0);
mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
list_add(&bd->bd_list, &tr->tr_buf);
tr->tr_num_buf_new++;
}
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_bufdata *bd;
lock_buffer(bh);
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd == NULL) {
gfs2_log_unlock(sdp);
unlock_buffer(bh);
lock_page(bh->b_page);
if (bh->b_private == NULL)
bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
unlock_page(bh->b_page);
lock_buffer(bh);
gfs2_log_lock(sdp);
}
gfs2_assert(sdp, bd->bd_gl == gl);
meta_lo_add(sdp, bd);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
struct gfs2_trans *tr = current->journal_info;
BUG_ON(!list_empty(&bd->bd_list));
gfs2_add_revoke(sdp, bd);
tr->tr_touched = 1;
tr->tr_num_revoke++;
}
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
{
struct gfs2_bufdata *bd, *tmp;
struct gfs2_trans *tr = current->journal_info;
unsigned int n = len;
gfs2_log_lock(sdp);
list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_list) {
if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) {
list_del_init(&bd->bd_list);
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
kmem_cache_free(gfs2_bufdata_cachep, bd);
tr->tr_num_revoke_rm++;
if (--n == 0)
break;
}
}
gfs2_log_unlock(sdp);
}

47
fs/gfs2/trans.h Normal file
View file

@ -0,0 +1,47 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __TRANS_DOT_H__
#define __TRANS_DOT_H__
#include <linux/buffer_head.h>
struct gfs2_sbd;
struct gfs2_rgrpd;
struct gfs2_glock;
#define RES_DINODE 1
#define RES_INDIRECT 1
#define RES_JDATA 1
#define RES_DATA 1
#define RES_LEAF 1
#define RES_RG_HDR 1
#define RES_RG_BIT 2
#define RES_EATTR 1
#define RES_STATFS 1
#define RES_QUOTA 2
/* reserve either the number of blocks to be allocated plus the rg header
* block, or all of the blocks in the rg, whichever is smaller */
static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
{
if (requested < ip->i_rgd->rd_length)
return requested + 1;
return ip->i_rgd->rd_length;
}
extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
unsigned int revokes);
extern void gfs2_trans_end(struct gfs2_sbd *sdp);
extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
#endif /* __TRANS_DOT_H__ */

265
fs/gfs2/util.c Normal file
View file

@ -0,0 +1,265 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <asm/uaccess.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "util.h"
struct kmem_cache *gfs2_glock_cachep __read_mostly;
struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
struct kmem_cache *gfs2_inode_cachep __read_mostly;
struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
struct kmem_cache *gfs2_quotad_cachep __read_mostly;
struct kmem_cache *gfs2_rsrv_cachep __read_mostly;
mempool_t *gfs2_page_pool __read_mostly;
void gfs2_assert_i(struct gfs2_sbd *sdp)
{
fs_emerg(sdp, "fatal assertion failed\n");
}
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
const struct lm_lockops *lm = ls->ls_ops;
va_list args;
struct va_format vaf;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return 0;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
fs_err(sdp, "%pV", &vaf);
va_end(args);
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
wait_for_completion(&sdp->sd_wdack);
if (lm->lm_unmount) {
fs_err(sdp, "telling LM to unmount\n");
lm->lm_unmount(sdp);
}
fs_err(sdp, "withdrawn\n");
dump_stack();
}
if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
return -1;
}
/**
* gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
* Returns: -1 if this call withdrew the machine,
* -2 if it was already withdrawn
*/
int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line)
{
int me;
me = gfs2_lm_withdraw(sdp,
"fatal: assertion \"%s\" failed\n"
" function = %s, file = %s, line = %u\n",
assertion, function, file, line);
dump_stack();
return (me) ? -1 : -2;
}
/**
* gfs2_assert_warn_i - Print a message to the console if @assertion is false
* Returns: -1 if we printed something
* -2 if we didn't
*/
int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line)
{
if (time_before(jiffies,
sdp->sd_last_warning +
gfs2_tune_get(sdp, gt_complain_secs) * HZ))
return -2;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
assertion, function, file, line);
if (sdp->sd_args.ar_debug)
BUG();
else
dump_stack();
if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname, assertion,
sdp->sd_fsname, function, file, line);
sdp->sd_last_warning = jiffies;
return -1;
}
/**
* gfs2_consist_i - Flag a filesystem consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
char *file, unsigned int line)
{
int rv;
rv = gfs2_lm_withdraw(sdp,
"fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
function, file, line);
return rv;
}
/**
* gfs2_consist_inode_i - Flag an inode consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int rv;
rv = gfs2_lm_withdraw(sdp,
"fatal: filesystem consistency error\n"
" inode = %llu %llu\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
function, file, line);
return rv;
}
/**
* gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
int rv;
rv = gfs2_lm_withdraw(sdp,
"fatal: filesystem consistency error\n"
" RG = %llu\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)rgd->rd_addr,
function, file, line);
return rv;
}
/**
* gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* -2 if it was already withdrawn
*/
int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *type, const char *function, char *file,
unsigned int line)
{
int me;
me = gfs2_lm_withdraw(sdp,
"fatal: invalid metadata block\n"
" bh = %llu (%s)\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr, type,
function, file, line);
return (me) ? -1 : -2;
}
/**
* gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* -2 if it was already withdrawn
*/
int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
u16 type, u16 t, const char *function,
char *file, unsigned int line)
{
int me;
me = gfs2_lm_withdraw(sdp,
"fatal: invalid metadata block\n"
" bh = %llu (type: exp=%u, found=%u)\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr, type, t,
function, file, line);
return (me) ? -1 : -2;
}
/**
* gfs2_io_error_i - Flag an I/O error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
unsigned int line)
{
int rv;
rv = gfs2_lm_withdraw(sdp,
"fatal: I/O error\n"
" function = %s, file = %s, line = %u\n",
function, file, line);
return rv;
}
/**
* gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line)
{
int rv;
rv = gfs2_lm_withdraw(sdp,
"fatal: I/O error\n"
" block = %llu\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr,
function, file, line);
return rv;
}

171
fs/gfs2/util.h Normal file
View file

@ -0,0 +1,171 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __UTIL_DOT_H__
#define __UTIL_DOT_H__
#ifdef pr_fmt
#undef pr_fmt
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#endif
#include <linux/mempool.h>
#include "incore.h"
#define fs_emerg(fs, fmt, ...) \
pr_emerg("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__)
#define fs_warn(fs, fmt, ...) \
pr_warn("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__)
#define fs_err(fs, fmt, ...) \
pr_err("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__)
#define fs_info(fs, fmt, ...) \
pr_info("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__)
void gfs2_assert_i(struct gfs2_sbd *sdp);
#define gfs2_assert(sdp, assertion) \
do { \
if (unlikely(!(assertion))) { \
gfs2_assert_i(sdp); \
BUG(); \
} \
} while (0)
int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line);
#define gfs2_assert_withdraw(sdp, assertion) \
((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
__func__, __FILE__, __LINE__))
int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line);
#define gfs2_assert_warn(sdp, assertion) \
((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
__func__, __FILE__, __LINE__))
int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist(sdp) \
gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist_inode(ip) \
gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist_rgrpd(rgd) \
gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *type, const char *function,
char *file, unsigned int line);
static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
struct buffer_head *bh)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
u32 magic = be32_to_cpu(mh->mh_magic);
if (unlikely(magic != GFS2_MAGIC)) {
pr_err("Magic number missing at %llu\n",
(unsigned long long)bh->b_blocknr);
return -EIO;
}
return 0;
}
int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
u16 type, u16 t,
const char *function,
char *file, unsigned int line);
static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
struct buffer_head *bh,
u16 type,
const char *function,
char *file, unsigned int line)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
u32 magic = be32_to_cpu(mh->mh_magic);
u16 t = be32_to_cpu(mh->mh_type);
if (unlikely(magic != GFS2_MAGIC))
return gfs2_meta_check_ii(sdp, bh, "magic number", function,
file, line);
if (unlikely(t != type))
return gfs2_metatype_check_ii(sdp, bh, type, t, function,
file, line);
return 0;
}
#define gfs2_metatype_check(sdp, bh, type) \
gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
u16 format)
{
struct gfs2_meta_header *mh;
mh = (struct gfs2_meta_header *)bh->b_data;
mh->mh_type = cpu_to_be32(type);
mh->mh_format = cpu_to_be32(format);
}
int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
char *file, unsigned int line);
#define gfs2_io_error(sdp) \
gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line);
#define gfs2_io_error_bh(sdp, bh) \
gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
extern struct kmem_cache *gfs2_glock_cachep;
extern struct kmem_cache *gfs2_glock_aspace_cachep;
extern struct kmem_cache *gfs2_inode_cachep;
extern struct kmem_cache *gfs2_bufdata_cachep;
extern struct kmem_cache *gfs2_rgrpd_cachep;
extern struct kmem_cache *gfs2_quotad_cachep;
extern struct kmem_cache *gfs2_rsrv_cachep;
extern mempool_t *gfs2_page_pool;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
unsigned int *p)
{
unsigned int x;
spin_lock(&gt->gt_spin);
x = *p;
spin_unlock(&gt->gt_spin);
return x;
}
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
__printf(2, 3)
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...);
#endif /* __UTIL_DOT_H__ */

1508
fs/gfs2/xattr.c Normal file

File diff suppressed because it is too large Load diff

67
fs/gfs2/xattr.h Normal file
View file

@ -0,0 +1,67 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __EATTR_DOT_H__
#define __EATTR_DOT_H__
struct gfs2_inode;
struct iattr;
#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
#define GFS2_EA_SIZE(ea) \
ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
(sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
#define GFS2_EAREQ_SIZE_STUFFED(er) \
ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
#define GFS2_EA2DATAPTRS(ea) \
((__be64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
#define GFS2_EA2NEXT(ea) \
((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
#define GFS2_EA_BH2FIRST(bh) \
((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
struct gfs2_ea_request {
const char *er_name;
char *er_data;
unsigned int er_name_len;
unsigned int er_data_len;
unsigned int er_type; /* GFS2_EATYPE_... */
};
struct gfs2_ea_location {
struct buffer_head *el_bh;
struct gfs2_ea_header *el_ea;
struct gfs2_ea_header *el_prev;
};
extern int __gfs2_xattr_set(struct inode *inode, const char *name,
const void *value, size_t size,
int flags, int type);
extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
/* Exported to acl.c */
extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data);
#endif /* __EATTR_DOT_H__ */