Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

16
fs/dlm/Kconfig Normal file
View file

@ -0,0 +1,16 @@
menuconfig DLM
tristate "Distributed Lock Manager (DLM)"
depends on INET
depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
select IP_SCTP
help
A general purpose distributed lock manager for kernel or userspace
applications.
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
help
Under the debugfs mount point, the name of each lockspace will
appear as a file in the "dlm" directory. The output is the
list of resource and locks the local node knows about.

21
fs/dlm/Makefile Normal file
View file

@ -0,0 +1,21 @@
obj-$(CONFIG_DLM) += dlm.o
dlm-y := ast.o \
config.o \
dir.o \
lock.o \
lockspace.o \
main.o \
member.o \
memory.o \
midcomms.o \
netlink.o \
lowcomms.o \
plock.o \
rcom.o \
recover.o \
recoverd.o \
requestqueue.o \
user.o \
util.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o

314
fs/dlm/ast.c Normal file
View file

@ -0,0 +1,314 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lock.h"
#include "user.h"
#include "ast.h"
static uint64_t dlm_cb_seq;
static DEFINE_SPINLOCK(dlm_cb_seq_spin);
static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
{
int i;
log_print("last_bast %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id,
(unsigned long long)lkb->lkb_last_bast.seq,
lkb->lkb_last_bast.flags,
lkb->lkb_last_bast.mode,
lkb->lkb_last_bast.sb_status,
lkb->lkb_last_bast.sb_flags);
log_print("last_cast %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id,
(unsigned long long)lkb->lkb_last_cast.seq,
lkb->lkb_last_cast.flags,
lkb->lkb_last_cast.mode,
lkb->lkb_last_cast.sb_status,
lkb->lkb_last_cast.sb_flags);
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
log_print("cb %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id,
(unsigned long long)lkb->lkb_callbacks[i].seq,
lkb->lkb_callbacks[i].flags,
lkb->lkb_callbacks[i].mode,
lkb->lkb_callbacks[i].sb_status,
lkb->lkb_callbacks[i].sb_flags);
}
}
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
uint64_t prev_seq;
int prev_mode;
int i, rv;
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
if (lkb->lkb_callbacks[i].seq)
continue;
/*
* Suppress some redundant basts here, do more on removal.
* Don't even add a bast if the callback just before it
* is a bast for the same mode or a more restrictive mode.
* (the addional > PR check is needed for PR/CW inversion)
*/
if ((i > 0) && (flags & DLM_CB_BAST) &&
(lkb->lkb_callbacks[i-1].flags & DLM_CB_BAST)) {
prev_seq = lkb->lkb_callbacks[i-1].seq;
prev_mode = lkb->lkb_callbacks[i-1].mode;
if ((prev_mode == mode) ||
(prev_mode > mode && prev_mode > DLM_LOCK_PR)) {
log_debug(ls, "skip %x add bast %llu mode %d "
"for bast %llu mode %d",
lkb->lkb_id,
(unsigned long long)seq,
mode,
(unsigned long long)prev_seq,
prev_mode);
rv = 0;
goto out;
}
}
lkb->lkb_callbacks[i].seq = seq;
lkb->lkb_callbacks[i].flags = flags;
lkb->lkb_callbacks[i].mode = mode;
lkb->lkb_callbacks[i].sb_status = status;
lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
rv = 0;
break;
}
if (i == DLM_CALLBACKS_SIZE) {
log_error(ls, "no callbacks %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id, (unsigned long long)seq,
flags, mode, status, sbflags);
dlm_dump_lkb_callbacks(lkb);
rv = -1;
goto out;
}
out:
return rv;
}
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid)
{
int i, rv;
*resid = 0;
if (!lkb->lkb_callbacks[0].seq) {
rv = -ENOENT;
goto out;
}
/* oldest undelivered cb is callbacks[0] */
memcpy(cb, &lkb->lkb_callbacks[0], sizeof(struct dlm_callback));
memset(&lkb->lkb_callbacks[0], 0, sizeof(struct dlm_callback));
/* shift others down */
for (i = 1; i < DLM_CALLBACKS_SIZE; i++) {
if (!lkb->lkb_callbacks[i].seq)
break;
memcpy(&lkb->lkb_callbacks[i-1], &lkb->lkb_callbacks[i],
sizeof(struct dlm_callback));
memset(&lkb->lkb_callbacks[i], 0, sizeof(struct dlm_callback));
(*resid)++;
}
/* if cb is a bast, it should be skipped if the blocking mode is
compatible with the last granted mode */
if ((cb->flags & DLM_CB_BAST) && lkb->lkb_last_cast.seq) {
if (dlm_modes_compat(cb->mode, lkb->lkb_last_cast.mode)) {
cb->flags |= DLM_CB_SKIP;
log_debug(ls, "skip %x bast %llu mode %d "
"for cast %llu mode %d",
lkb->lkb_id,
(unsigned long long)cb->seq,
cb->mode,
(unsigned long long)lkb->lkb_last_cast.seq,
lkb->lkb_last_cast.mode);
rv = 0;
goto out;
}
}
if (cb->flags & DLM_CB_CAST) {
memcpy(&lkb->lkb_last_cast, cb, sizeof(struct dlm_callback));
lkb->lkb_last_cast_time = ktime_get();
}
if (cb->flags & DLM_CB_BAST) {
memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
lkb->lkb_last_bast_time = ktime_get();
}
rv = 0;
out:
return rv;
}
void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
uint32_t sbflags)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
uint64_t new_seq, prev_seq;
int rv;
spin_lock(&dlm_cb_seq_spin);
new_seq = ++dlm_cb_seq;
spin_unlock(&dlm_cb_seq_spin);
if (lkb->lkb_flags & DLM_IFL_USER) {
dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
return;
}
mutex_lock(&lkb->lkb_cb_mutex);
prev_seq = lkb->lkb_callbacks[0].seq;
rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
if (rv < 0)
goto out;
if (!prev_seq) {
kref_get(&lkb->lkb_ref);
if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
mutex_lock(&ls->ls_cb_mutex);
list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
mutex_unlock(&ls->ls_cb_mutex);
} else {
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
}
}
out:
mutex_unlock(&lkb->lkb_cb_mutex);
}
void dlm_callback_work(struct work_struct *work)
{
struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
void (*castfn) (void *astparam);
void (*bastfn) (void *astparam, int mode);
struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
int i, rv, resid;
memset(&callbacks, 0, sizeof(callbacks));
mutex_lock(&lkb->lkb_cb_mutex);
if (!lkb->lkb_callbacks[0].seq) {
/* no callback work exists, shouldn't happen */
log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
dlm_print_lkb(lkb);
dlm_dump_lkb_callbacks(lkb);
}
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
if (rv < 0)
break;
}
if (resid) {
/* cbs remain, loop should have removed all, shouldn't happen */
log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
resid);
dlm_print_lkb(lkb);
dlm_dump_lkb_callbacks(lkb);
}
mutex_unlock(&lkb->lkb_cb_mutex);
castfn = lkb->lkb_astfn;
bastfn = lkb->lkb_bastfn;
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
if (!callbacks[i].seq)
break;
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
castfn(lkb->lkb_astparam);
}
}
/* undo kref_get from dlm_add_callback, may cause lkb to be freed */
dlm_put_lkb(lkb);
}
int dlm_callback_start(struct dlm_ls *ls)
{
ls->ls_callback_wq = alloc_workqueue("dlm_callback",
WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!ls->ls_callback_wq) {
log_print("can't start dlm_callback workqueue");
return -ENOMEM;
}
return 0;
}
void dlm_callback_stop(struct dlm_ls *ls)
{
if (ls->ls_callback_wq)
destroy_workqueue(ls->ls_callback_wq);
}
void dlm_callback_suspend(struct dlm_ls *ls)
{
set_bit(LSFL_CB_DELAY, &ls->ls_flags);
if (ls->ls_callback_wq)
flush_workqueue(ls->ls_callback_wq);
}
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
int count = 0;
clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
if (!ls->ls_callback_wq)
return;
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
count++;
}
mutex_unlock(&ls->ls_cb_mutex);
if (count)
log_rinfo(ls, "dlm_callback_resume %d", count);
}

32
fs/dlm/ast.h Normal file
View file

@ -0,0 +1,32 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __ASTD_DOT_H__
#define __ASTD_DOT_H__
void dlm_del_ast(struct dlm_lkb *lkb);
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq);
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid);
void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
uint32_t sbflags);
void dlm_callback_work(struct work_struct *work);
int dlm_callback_start(struct dlm_ls *ls);
void dlm_callback_stop(struct dlm_ls *ls);
void dlm_callback_suspend(struct dlm_ls *ls);
void dlm_callback_resume(struct dlm_ls *ls);
#endif

1040
fs/dlm/config.c Normal file

File diff suppressed because it is too large Load diff

53
fs/dlm/config.h Normal file
View file

@ -0,0 +1,53 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __CONFIG_DOT_H__
#define __CONFIG_DOT_H__
struct dlm_config_node {
int nodeid;
int weight;
int new;
uint32_t comm_seq;
};
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
int ci_tcp_port;
int ci_buffer_size;
int ci_rsbtbl_size;
int ci_recover_timer;
int ci_toss_secs;
int ci_scan_secs;
int ci_log_debug;
int ci_protocol;
int ci_timewarn_cs;
int ci_waitwarn_us;
int ci_new_rsb_count;
int ci_recover_callbacks;
char ci_cluster_name[DLM_LOCKSPACE_LEN];
};
extern struct dlm_config_info dlm_config;
int dlm_config_init(void);
void dlm_config_exit(void);
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int *count_out);
int dlm_comm_seq(int nodeid, uint32_t *seq);
int dlm_our_nodeid(void);
int dlm_our_addr(struct sockaddr_storage *addr, int num);
#endif /* __CONFIG_DOT_H__ */

810
fs/dlm/debug_fs.c Normal file
View file

@ -0,0 +1,810 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include <linux/pagemap.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
#include "dlm_internal.h"
#include "lock.h"
#define DLM_DEBUG_BUF_LEN 4096
static char debug_buf[DLM_DEBUG_BUF_LEN];
static struct mutex debug_buf_lock;
static struct dentry *dlm_root;
static char *print_lockmode(int mode)
{
switch (mode) {
case DLM_LOCK_IV:
return "--";
case DLM_LOCK_NL:
return "NL";
case DLM_LOCK_CR:
return "CR";
case DLM_LOCK_CW:
return "CW";
case DLM_LOCK_PR:
return "PR";
case DLM_LOCK_PW:
return "PW";
case DLM_LOCK_EX:
return "EX";
default:
return "??";
}
}
static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
struct dlm_rsb *res)
{
seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
if (lkb->lkb_status == DLM_LKSTS_CONVERT ||
lkb->lkb_status == DLM_LKSTS_WAITING)
seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode));
if (lkb->lkb_nodeid) {
if (lkb->lkb_nodeid != res->res_nodeid)
seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid,
lkb->lkb_remid);
else
seq_printf(s, " Master: %08x", lkb->lkb_remid);
}
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
return seq_puts(s, "\n");
}
static int print_format1(struct dlm_rsb *res, struct seq_file *s)
{
struct dlm_lkb *lkb;
int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
int rv;
lock_rsb(res);
rv = seq_printf(s, "\nResource %p Name (len=%d) \"",
res, res->res_length);
if (rv)
goto out;
for (i = 0; i < res->res_length; i++) {
if (isprint(res->res_name[i]))
seq_printf(s, "%c", res->res_name[i]);
else
seq_printf(s, "%c", '.');
}
if (res->res_nodeid > 0)
rv = seq_printf(s, "\"\nLocal Copy, Master is node %d\n",
res->res_nodeid);
else if (res->res_nodeid == 0)
rv = seq_puts(s, "\"\nMaster Copy\n");
else if (res->res_nodeid == -1)
rv = seq_printf(s, "\"\nLooking up master (lkid %x)\n",
res->res_first_lkid);
else
rv = seq_printf(s, "\"\nInvalid master %d\n",
res->res_nodeid);
if (rv)
goto out;
/* Print the LVB: */
if (res->res_lvbptr) {
seq_puts(s, "LVB: ");
for (i = 0; i < lvblen; i++) {
if (i == lvblen / 2)
seq_puts(s, "\n ");
seq_printf(s, "%02x ",
(unsigned char) res->res_lvbptr[i]);
}
if (rsb_flag(res, RSB_VALNOTVALID))
seq_puts(s, " (INVALID)");
rv = seq_puts(s, "\n");
if (rv)
goto out;
}
root_list = !list_empty(&res->res_root_list);
recover_list = !list_empty(&res->res_recover_list);
if (root_list || recover_list) {
rv = seq_printf(s, "Recovery: root %d recover %d flags %lx "
"count %d\n", root_list, recover_list,
res->res_flags, res->res_recover_locks_count);
if (rv)
goto out;
}
/* Print the locks attached to this resource */
seq_puts(s, "Granted Queue\n");
list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) {
rv = print_format1_lock(s, lkb, res);
if (rv)
goto out;
}
seq_puts(s, "Conversion Queue\n");
list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) {
rv = print_format1_lock(s, lkb, res);
if (rv)
goto out;
}
seq_puts(s, "Waiting Queue\n");
list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) {
rv = print_format1_lock(s, lkb, res);
if (rv)
goto out;
}
if (list_empty(&res->res_lookup))
goto out;
seq_puts(s, "Lookup Queue\n");
list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) {
rv = seq_printf(s, "%08x %s", lkb->lkb_id,
print_lockmode(lkb->lkb_rqmode));
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
rv = seq_puts(s, "\n");
}
out:
unlock_rsb(res);
return rv;
}
static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
struct dlm_rsb *r)
{
u64 xid = 0;
u64 us;
int rv;
if (lkb->lkb_flags & DLM_IFL_USER) {
if (lkb->lkb_ua)
xid = lkb->lkb_ua->xid;
}
/* microseconds since lkb was added to current queue */
us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_timestamp));
/* id nodeid remid pid xid exflags flags sts grmode rqmode time_us
r_nodeid r_len r_name */
rv = seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
lkb->lkb_id,
lkb->lkb_nodeid,
lkb->lkb_remid,
lkb->lkb_ownpid,
(unsigned long long)xid,
lkb->lkb_exflags,
lkb->lkb_flags,
lkb->lkb_status,
lkb->lkb_grmode,
lkb->lkb_rqmode,
(unsigned long long)us,
r->res_nodeid,
r->res_length,
r->res_name);
return rv;
}
static int print_format2(struct dlm_rsb *r, struct seq_file *s)
{
struct dlm_lkb *lkb;
int rv = 0;
lock_rsb(r);
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
rv = print_format2_lock(s, lkb, r);
if (rv)
goto out;
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
rv = print_format2_lock(s, lkb, r);
if (rv)
goto out;
}
list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
rv = print_format2_lock(s, lkb, r);
if (rv)
goto out;
}
out:
unlock_rsb(r);
return rv;
}
static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
int rsb_lookup)
{
u64 xid = 0;
int rv;
if (lkb->lkb_flags & DLM_IFL_USER) {
if (lkb->lkb_ua)
xid = lkb->lkb_ua->xid;
}
rv = seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
lkb->lkb_id,
lkb->lkb_nodeid,
lkb->lkb_remid,
lkb->lkb_ownpid,
(unsigned long long)xid,
lkb->lkb_exflags,
lkb->lkb_flags,
lkb->lkb_status,
lkb->lkb_grmode,
lkb->lkb_rqmode,
lkb->lkb_last_bast.mode,
rsb_lookup,
lkb->lkb_wait_type,
lkb->lkb_lvbseq,
(unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
(unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time));
return rv;
}
static int print_format3(struct dlm_rsb *r, struct seq_file *s)
{
struct dlm_lkb *lkb;
int i, lvblen = r->res_ls->ls_lvblen;
int print_name = 1;
int rv;
lock_rsb(r);
rv = seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
r,
r->res_nodeid,
r->res_first_lkid,
r->res_flags,
!list_empty(&r->res_root_list),
!list_empty(&r->res_recover_list),
r->res_recover_locks_count,
r->res_length);
if (rv)
goto out;
for (i = 0; i < r->res_length; i++) {
if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
print_name = 0;
}
seq_printf(s, "%s", print_name ? "str " : "hex");
for (i = 0; i < r->res_length; i++) {
if (print_name)
seq_printf(s, "%c", r->res_name[i]);
else
seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
}
rv = seq_puts(s, "\n");
if (rv)
goto out;
if (!r->res_lvbptr)
goto do_locks;
seq_printf(s, "lvb %u %d", r->res_lvbseq, lvblen);
for (i = 0; i < lvblen; i++)
seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]);
rv = seq_puts(s, "\n");
if (rv)
goto out;
do_locks:
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
rv = print_format3_lock(s, lkb, 0);
if (rv)
goto out;
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
rv = print_format3_lock(s, lkb, 0);
if (rv)
goto out;
}
list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
rv = print_format3_lock(s, lkb, 0);
if (rv)
goto out;
}
list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) {
rv = print_format3_lock(s, lkb, 1);
if (rv)
goto out;
}
out:
unlock_rsb(r);
return rv;
}
static int print_format4(struct dlm_rsb *r, struct seq_file *s)
{
int our_nodeid = dlm_our_nodeid();
int print_name = 1;
int i, rv;
lock_rsb(r);
rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ",
r,
r->res_nodeid,
r->res_master_nodeid,
r->res_dir_nodeid,
our_nodeid,
r->res_toss_time,
r->res_flags,
r->res_length);
if (rv)
goto out;
for (i = 0; i < r->res_length; i++) {
if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
print_name = 0;
}
seq_printf(s, "%s", print_name ? "str " : "hex");
for (i = 0; i < r->res_length; i++) {
if (print_name)
seq_printf(s, "%c", r->res_name[i]);
else
seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
}
rv = seq_puts(s, "\n");
out:
unlock_rsb(r);
return rv;
}
struct rsbtbl_iter {
struct dlm_rsb *rsb;
unsigned bucket;
int format;
int header;
};
/* seq_printf returns -1 if the buffer is full, and 0 otherwise.
If the buffer is full, seq_printf can be called again, but it
does nothing and just returns -1. So, the these printing routines
periodically check the return value to avoid wasting too much time
trying to print to a full buffer. */
static int table_seq_show(struct seq_file *seq, void *iter_ptr)
{
struct rsbtbl_iter *ri = iter_ptr;
int rv = 0;
switch (ri->format) {
case 1:
rv = print_format1(ri->rsb, seq);
break;
case 2:
if (ri->header) {
seq_printf(seq, "id nodeid remid pid xid exflags "
"flags sts grmode rqmode time_ms "
"r_nodeid r_len r_name\n");
ri->header = 0;
}
rv = print_format2(ri->rsb, seq);
break;
case 3:
if (ri->header) {
seq_printf(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
ri->header = 0;
}
rv = print_format3(ri->rsb, seq);
break;
case 4:
if (ri->header) {
seq_printf(seq, "version 4 rsb 2\n");
ri->header = 0;
}
rv = print_format4(ri->rsb, seq);
break;
}
return rv;
}
static const struct seq_operations format1_seq_ops;
static const struct seq_operations format2_seq_ops;
static const struct seq_operations format3_seq_ops;
static const struct seq_operations format4_seq_ops;
static void *table_seq_start(struct seq_file *seq, loff_t *pos)
{
struct rb_root *tree;
struct rb_node *node;
struct dlm_ls *ls = seq->private;
struct rsbtbl_iter *ri;
struct dlm_rsb *r;
loff_t n = *pos;
unsigned bucket, entry;
int toss = (seq->op == &format4_seq_ops);
bucket = n >> 32;
entry = n & ((1LL << 32) - 1);
if (bucket >= ls->ls_rsbtbl_size)
return NULL;
ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_NOFS);
if (!ri)
return NULL;
if (n == 0)
ri->header = 1;
if (seq->op == &format1_seq_ops)
ri->format = 1;
if (seq->op == &format2_seq_ops)
ri->format = 2;
if (seq->op == &format3_seq_ops)
ri->format = 3;
if (seq->op == &format4_seq_ops)
ri->format = 4;
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
if (!RB_EMPTY_ROOT(tree)) {
for (node = rb_first(tree); node; node = rb_next(node)) {
r = rb_entry(node, struct dlm_rsb, res_hashnode);
if (!entry--) {
dlm_hold_rsb(r);
ri->rsb = r;
ri->bucket = bucket;
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
return ri;
}
}
}
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
/*
* move to the first rsb in the next non-empty bucket
*/
/* zero the entry */
n &= ~((1LL << 32) - 1);
while (1) {
bucket++;
n += 1LL << 32;
if (bucket >= ls->ls_rsbtbl_size) {
kfree(ri);
return NULL;
}
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
if (!RB_EMPTY_ROOT(tree)) {
node = rb_first(tree);
r = rb_entry(node, struct dlm_rsb, res_hashnode);
dlm_hold_rsb(r);
ri->rsb = r;
ri->bucket = bucket;
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
*pos = n;
return ri;
}
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
}
}
static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
{
struct dlm_ls *ls = seq->private;
struct rsbtbl_iter *ri = iter_ptr;
struct rb_root *tree;
struct rb_node *next;
struct dlm_rsb *r, *rp;
loff_t n = *pos;
unsigned bucket;
int toss = (seq->op == &format4_seq_ops);
bucket = n >> 32;
/*
* move to the next rsb in the same bucket
*/
spin_lock(&ls->ls_rsbtbl[bucket].lock);
rp = ri->rsb;
next = rb_next(&rp->res_hashnode);
if (next) {
r = rb_entry(next, struct dlm_rsb, res_hashnode);
dlm_hold_rsb(r);
ri->rsb = r;
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
dlm_put_rsb(rp);
++*pos;
return ri;
}
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
dlm_put_rsb(rp);
/*
* move to the first rsb in the next non-empty bucket
*/
/* zero the entry */
n &= ~((1LL << 32) - 1);
while (1) {
bucket++;
n += 1LL << 32;
if (bucket >= ls->ls_rsbtbl_size) {
kfree(ri);
return NULL;
}
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
if (!RB_EMPTY_ROOT(tree)) {
next = rb_first(tree);
r = rb_entry(next, struct dlm_rsb, res_hashnode);
dlm_hold_rsb(r);
ri->rsb = r;
ri->bucket = bucket;
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
*pos = n;
return ri;
}
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
}
}
static void table_seq_stop(struct seq_file *seq, void *iter_ptr)
{
struct rsbtbl_iter *ri = iter_ptr;
if (ri) {
dlm_put_rsb(ri->rsb);
kfree(ri);
}
}
static const struct seq_operations format1_seq_ops = {
.start = table_seq_start,
.next = table_seq_next,
.stop = table_seq_stop,
.show = table_seq_show,
};
static const struct seq_operations format2_seq_ops = {
.start = table_seq_start,
.next = table_seq_next,
.stop = table_seq_stop,
.show = table_seq_show,
};
static const struct seq_operations format3_seq_ops = {
.start = table_seq_start,
.next = table_seq_next,
.stop = table_seq_stop,
.show = table_seq_show,
};
static const struct seq_operations format4_seq_ops = {
.start = table_seq_start,
.next = table_seq_next,
.stop = table_seq_stop,
.show = table_seq_show,
};
static const struct file_operations format1_fops;
static const struct file_operations format2_fops;
static const struct file_operations format3_fops;
static const struct file_operations format4_fops;
static int table_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int ret = -1;
if (file->f_op == &format1_fops)
ret = seq_open(file, &format1_seq_ops);
else if (file->f_op == &format2_fops)
ret = seq_open(file, &format2_seq_ops);
else if (file->f_op == &format3_fops)
ret = seq_open(file, &format3_seq_ops);
else if (file->f_op == &format4_fops)
ret = seq_open(file, &format4_seq_ops);
if (ret)
return ret;
seq = file->private_data;
seq->private = inode->i_private; /* the dlm_ls */
return 0;
}
static const struct file_operations format1_fops = {
.owner = THIS_MODULE,
.open = table_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static const struct file_operations format2_fops = {
.owner = THIS_MODULE,
.open = table_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static const struct file_operations format3_fops = {
.owner = THIS_MODULE,
.open = table_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static const struct file_operations format4_fops = {
.owner = THIS_MODULE,
.open = table_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
/*
* dump lkb's on the ls_waiters list
*/
static ssize_t waiters_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
struct dlm_ls *ls = file->private_data;
struct dlm_lkb *lkb;
size_t len = DLM_DEBUG_BUF_LEN, pos = 0, ret, rv;
mutex_lock(&debug_buf_lock);
mutex_lock(&ls->ls_waiters_mutex);
memset(debug_buf, 0, sizeof(debug_buf));
list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
ret = snprintf(debug_buf + pos, len - pos, "%x %d %d %s\n",
lkb->lkb_id, lkb->lkb_wait_type,
lkb->lkb_nodeid, lkb->lkb_resource->res_name);
if (ret >= len - pos)
break;
pos += ret;
}
mutex_unlock(&ls->ls_waiters_mutex);
rv = simple_read_from_buffer(userbuf, count, ppos, debug_buf, pos);
mutex_unlock(&debug_buf_lock);
return rv;
}
static const struct file_operations waiters_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = waiters_read,
.llseek = default_llseek,
};
void dlm_delete_debug_file(struct dlm_ls *ls)
{
debugfs_remove(ls->ls_debug_rsb_dentry);
debugfs_remove(ls->ls_debug_waiters_dentry);
debugfs_remove(ls->ls_debug_locks_dentry);
debugfs_remove(ls->ls_debug_all_dentry);
debugfs_remove(ls->ls_debug_toss_dentry);
}
int dlm_create_debug_file(struct dlm_ls *ls)
{
char name[DLM_LOCKSPACE_LEN+8];
/* format 1 */
ls->ls_debug_rsb_dentry = debugfs_create_file(ls->ls_name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&format1_fops);
if (!ls->ls_debug_rsb_dentry)
goto fail;
/* format 2 */
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
ls->ls_debug_locks_dentry = debugfs_create_file(name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&format2_fops);
if (!ls->ls_debug_locks_dentry)
goto fail;
/* format 3 */
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_all", ls->ls_name);
ls->ls_debug_all_dentry = debugfs_create_file(name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&format3_fops);
if (!ls->ls_debug_all_dentry)
goto fail;
/* format 4 */
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_toss", ls->ls_name);
ls->ls_debug_toss_dentry = debugfs_create_file(name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&format4_fops);
if (!ls->ls_debug_toss_dentry)
goto fail;
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name);
ls->ls_debug_waiters_dentry = debugfs_create_file(name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&waiters_fops);
if (!ls->ls_debug_waiters_dentry)
goto fail;
return 0;
fail:
dlm_delete_debug_file(ls);
return -ENOMEM;
}
int __init dlm_register_debugfs(void)
{
mutex_init(&debug_buf_lock);
dlm_root = debugfs_create_dir("dlm", NULL);
return dlm_root ? 0 : -ENOMEM;
}
void dlm_unregister_debugfs(void)
{
debugfs_remove(dlm_root);
}

308
fs/dlm/dir.c Normal file
View file

@ -0,0 +1,308 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "lowcomms.h"
#include "rcom.h"
#include "config.h"
#include "memory.h"
#include "recover.h"
#include "util.h"
#include "lock.h"
#include "dir.h"
/*
* We use the upper 16 bits of the hash value to select the directory node.
* Low bits are used for distribution of rsb's among hash buckets on each node.
*
* To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
* num_nodes to the hash value. This value in the desired range is used as an
* offset into the sorted list of nodeid's to give the particular nodeid.
*/
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
{
uint32_t node;
if (ls->ls_num_nodes == 1)
return dlm_our_nodeid();
else {
node = (hash >> 16) % ls->ls_total_weight;
return ls->ls_node_array[node];
}
}
int dlm_dir_nodeid(struct dlm_rsb *r)
{
return r->res_dir_nodeid;
}
void dlm_recover_dir_nodeid(struct dlm_ls *ls)
{
struct dlm_rsb *r;
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
}
up_read(&ls->ls_root_sem);
}
int dlm_recover_directory(struct dlm_ls *ls)
{
struct dlm_member *memb;
char *b, *last_name = NULL;
int error = -ENOMEM, last_len, nodeid, result;
uint16_t namelen;
unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
log_rinfo(ls, "dlm_recover_directory");
if (dlm_no_directory(ls))
goto out_status;
last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
if (!last_name)
goto out;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->nodeid == dlm_our_nodeid())
continue;
memset(last_name, 0, DLM_RESNAME_MAXLEN);
last_len = 0;
for (;;) {
int left;
error = dlm_recovery_stopped(ls);
if (error)
goto out_free;
error = dlm_rcom_names(ls, memb->nodeid,
last_name, last_len);
if (error)
goto out_free;
cond_resched();
/*
* pick namelen/name pairs out of received buffer
*/
b = ls->ls_recover_buf->rc_buf;
left = ls->ls_recover_buf->rc_header.h_length;
left -= sizeof(struct dlm_rcom);
for (;;) {
__be16 v;
error = -EINVAL;
if (left < sizeof(__be16))
goto out_free;
memcpy(&v, b, sizeof(__be16));
namelen = be16_to_cpu(v);
b += sizeof(__be16);
left -= sizeof(__be16);
/* namelen of 0xFFFFF marks end of names for
this node; namelen of 0 marks end of the
buffer */
if (namelen == 0xFFFF)
goto done;
if (!namelen)
break;
if (namelen > left)
goto out_free;
if (namelen > DLM_RESNAME_MAXLEN)
goto out_free;
error = dlm_master_lookup(ls, memb->nodeid,
b, namelen,
DLM_LU_RECOVER_DIR,
&nodeid, &result);
if (error) {
log_error(ls, "recover_dir lookup %d",
error);
goto out_free;
}
/* The name was found in rsbtbl, but the
* master nodeid is different from
* memb->nodeid which says it is the master.
* This should not happen. */
if (result == DLM_LU_MATCH &&
nodeid != memb->nodeid) {
count_bad++;
log_error(ls, "recover_dir lookup %d "
"nodeid %d memb %d bad %u",
result, nodeid, memb->nodeid,
count_bad);
print_hex_dump_bytes("dlm_recover_dir ",
DUMP_PREFIX_NONE,
b, namelen);
}
/* The name was found in rsbtbl, and the
* master nodeid matches memb->nodeid. */
if (result == DLM_LU_MATCH &&
nodeid == memb->nodeid) {
count_match++;
}
/* The name was not found in rsbtbl and was
* added with memb->nodeid as the master. */
if (result == DLM_LU_ADD) {
count_add++;
}
last_len = namelen;
memcpy(last_name, b, namelen);
b += namelen;
left -= namelen;
count++;
}
}
done:
;
}
out_status:
error = 0;
dlm_set_recover_status(ls, DLM_RS_DIR);
log_rinfo(ls, "dlm_recover_directory %u in %u new",
count, count_add);
out_free:
kfree(last_name);
out:
return error;
}
static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
{
struct dlm_rsb *r;
uint32_t hash, bucket;
int rv;
hash = jhash(name, len, 0);
bucket = hash & (ls->ls_rsbtbl_size - 1);
spin_lock(&ls->ls_rsbtbl[bucket].lock);
rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
if (rv)
rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
name, len, &r);
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
if (!rv)
return r;
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (len == r->res_length && !memcmp(name, r->res_name, len)) {
up_read(&ls->ls_root_sem);
log_debug(ls, "find_rsb_root revert to root_list %s",
r->res_name);
return r;
}
}
up_read(&ls->ls_root_sem);
return NULL;
}
/* Find the rsb where we left off (or start again), then send rsb names
for rsb's we're master of and whose directory node matches the requesting
node. inbuf is the rsb name last sent, inlen is the name's length */
void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
char *outbuf, int outlen, int nodeid)
{
struct list_head *list;
struct dlm_rsb *r;
int offset = 0, dir_nodeid;
__be16 be_namelen;
down_read(&ls->ls_root_sem);
if (inlen > 1) {
r = find_rsb_root(ls, inbuf, inlen);
if (!r) {
inbuf[inlen - 1] = '\0';
log_error(ls, "copy_master_names from %d start %d %s",
nodeid, inlen, inbuf);
goto out;
}
list = r->res_root_list.next;
} else {
list = ls->ls_root_list.next;
}
for (offset = 0; list != &ls->ls_root_list; list = list->next) {
r = list_entry(list, struct dlm_rsb, res_root_list);
if (r->res_nodeid)
continue;
dir_nodeid = dlm_dir_nodeid(r);
if (dir_nodeid != nodeid)
continue;
/*
* The block ends when we can't fit the following in the
* remaining buffer space:
* namelen (uint16_t) +
* name (r->res_length) +
* end-of-block record 0x0000 (uint16_t)
*/
if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
/* Write end-of-block record */
be_namelen = cpu_to_be16(0);
memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
offset += sizeof(__be16);
ls->ls_recover_dir_sent_msg++;
goto out;
}
be_namelen = cpu_to_be16(r->res_length);
memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
offset += sizeof(__be16);
memcpy(outbuf + offset, r->res_name, r->res_length);
offset += r->res_length;
ls->ls_recover_dir_sent_res++;
}
/*
* If we've reached the end of the list (and there's room) write a
* terminating record.
*/
if ((list == &ls->ls_root_list) &&
(offset + sizeof(uint16_t) <= outlen)) {
be_namelen = cpu_to_be16(0xFFFF);
memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
offset += sizeof(__be16);
ls->ls_recover_dir_sent_msg++;
}
out:
up_read(&ls->ls_root_sem);
}

25
fs/dlm/dir.h Normal file
View file

@ -0,0 +1,25 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __DIR_DOT_H__
#define __DIR_DOT_H__
int dlm_dir_nodeid(struct dlm_rsb *rsb);
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash);
void dlm_recover_dir_nodeid(struct dlm_ls *ls);
int dlm_recover_directory(struct dlm_ls *ls);
void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
char *outbuf, int outlen, int nodeid);
#endif /* __DIR_DOT_H__ */

729
fs/dlm/dlm_internal.h Normal file
View file

@ -0,0 +1,729 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __DLM_INTERNAL_DOT_H__
#define __DLM_INTERNAL_DOT_H__
/*
* This is the main header file to be included in each DLM source file.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/random.h>
#include <linux/delay.h>
#include <linux/socket.h>
#include <linux/kthread.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/ratelimit.h>
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include "config.h"
/* Size of the temp buffer midcomms allocates on the stack.
We try to make this large enough so most messages fit.
FIXME: should sctp make this unnecessary? */
#define DLM_INBUF_LEN 148
struct dlm_ls;
struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
struct dlm_rsbtable;
struct dlm_recover;
struct dlm_header;
struct dlm_message;
struct dlm_rcom;
struct dlm_mhandle;
#define log_print(fmt, args...) \
printk(KERN_ERR "dlm: "fmt"\n" , ##args)
#define log_error(ls, fmt, args...) \
printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
#define log_rinfo(ls, fmt, args...) \
printk(KERN_INFO "dlm: %s: " fmt "\n", (ls)->ls_name , ##args);
#define log_debug(ls, fmt, args...) \
do { \
if (dlm_config.ci_log_debug) \
printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
(ls)->ls_name , ##args); \
} while (0)
#define log_limit(ls, fmt, args...) \
do { \
if (dlm_config.ci_log_debug) \
printk_ratelimited(KERN_DEBUG "dlm: %s: " fmt "\n", \
(ls)->ls_name , ##args); \
} while (0)
#define DLM_ASSERT(x, do) \
{ \
if (!(x)) \
{ \
printk(KERN_ERR "\nDLM: Assertion failed on line %d of file %s\n" \
"DLM: assertion: \"%s\"\n" \
"DLM: time = %lu\n", \
__LINE__, __FILE__, #x, jiffies); \
{do} \
printk("\n"); \
BUG(); \
panic("DLM: Record message above and reboot.\n"); \
} \
}
#define DLM_RTF_SHRINK 0x00000001
struct dlm_rsbtable {
struct rb_root keep;
struct rb_root toss;
spinlock_t lock;
uint32_t flags;
};
/*
* Lockspace member (per node in a ls)
*/
struct dlm_member {
struct list_head list;
int nodeid;
int weight;
int slot;
int slot_prev;
int comm_seq;
uint32_t generation;
};
/*
* Save and manage recovery state for a lockspace.
*/
struct dlm_recover {
struct list_head list;
struct dlm_config_node *nodes;
int nodes_count;
uint64_t seq;
};
/*
* Pass input args to second stage locking function.
*/
struct dlm_args {
uint32_t flags;
void (*astfn) (void *astparam);
void *astparam;
void (*bastfn) (void *astparam, int mode);
int mode;
struct dlm_lksb *lksb;
unsigned long timeout;
};
/*
* Lock block
*
* A lock can be one of three types:
*
* local copy lock is mastered locally
* (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set)
* process copy lock is mastered on a remote node
* (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set)
* master copy master node's copy of a lock owned by remote node
* (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set)
*
* lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or
* dlm_unlock. The dlm does not modify these or use any private flags in
* this field; it only contains DLM_LKF_ flags from dlm.h. These flags
* are sent as-is to the remote master when the lock is remote.
*
* lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h.
* Some internal flags are shared between the master and process nodes;
* these shared flags are kept in the lower two bytes. One of these
* flags set on the master copy will be propagated to the process copy
* and v.v. Other internal flags are private to the master or process
* node (e.g. DLM_IFL_MSTCPY). These are kept in the high two bytes.
*
* lkb_sbflags: status block flags. These flags are copied directly into
* the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion
* ast. All defined in dlm.h with DLM_SBF_ prefix.
*
* lkb_status: the lock status indicates which rsb queue the lock is
* on, grant, convert, or wait. DLM_LKSTS_ WAITING/GRANTED/CONVERT
*
* lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a
* reply is needed. Only set when the lkb is on the lockspace waiters
* list awaiting a reply from a remote node.
*
* lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb
* is a master copy, nodeid specifies the remote lock holder, when the
* lkb is a process copy, the nodeid specifies the lock master.
*/
/* lkb_status */
#define DLM_LKSTS_WAITING 1
#define DLM_LKSTS_GRANTED 2
#define DLM_LKSTS_CONVERT 3
/* lkb_flags */
#define DLM_IFL_MSTCPY 0x00010000
#define DLM_IFL_RESEND 0x00020000
#define DLM_IFL_DEAD 0x00040000
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
#define DLM_CALLBACKS_SIZE 6
#define DLM_CB_CAST 0x00000001
#define DLM_CB_BAST 0x00000002
#define DLM_CB_SKIP 0x00000004
struct dlm_callback {
uint64_t seq;
uint32_t flags; /* DLM_CBF_ */
int sb_status; /* copy to lksb status */
uint8_t sb_flags; /* copy to lksb flags */
int8_t mode; /* rq mode of bast, gr mode of cast */
};
struct dlm_lkb {
struct dlm_rsb *lkb_resource; /* the rsb */
struct kref lkb_ref;
int lkb_nodeid; /* copied from rsb */
int lkb_ownpid; /* pid of lock owner */
uint32_t lkb_id; /* our lock ID */
uint32_t lkb_remid; /* lock ID on remote partner */
uint32_t lkb_exflags; /* external flags from caller */
uint32_t lkb_sbflags; /* lksb flags */
uint32_t lkb_flags; /* internal flags */
uint32_t lkb_lvbseq; /* lvb sequence number */
int8_t lkb_status; /* granted, waiting, convert */
int8_t lkb_rqmode; /* requested lock mode */
int8_t lkb_grmode; /* granted lock mode */
int8_t lkb_highbast; /* highest mode bast sent for */
int8_t lkb_wait_type; /* type of reply waiting for */
int8_t lkb_wait_count;
int lkb_wait_nodeid; /* for debugging */
struct list_head lkb_statequeue; /* rsb g/c/w list */
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_ownqueue; /* list of locks for a process */
struct list_head lkb_time_list;
ktime_t lkb_timestamp;
ktime_t lkb_wait_time;
unsigned long lkb_timeout_cs;
struct mutex lkb_cb_mutex;
struct work_struct lkb_cb_work;
struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */
struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
struct dlm_callback lkb_last_cast;
struct dlm_callback lkb_last_bast;
ktime_t lkb_last_cast_time; /* for debugging */
ktime_t lkb_last_bast_time; /* for debugging */
uint64_t lkb_recover_seq; /* from ls_recover_seq */
char *lkb_lvbptr;
struct dlm_lksb *lkb_lksb; /* caller's status block */
void (*lkb_astfn) (void *astparam);
void (*lkb_bastfn) (void *astparam, int mode);
union {
void *lkb_astparam; /* caller's ast arg */
struct dlm_user_args *lkb_ua;
};
};
/*
* res_master_nodeid is "normal": 0 is unset/invalid, non-zero is the real
* nodeid, even when nodeid is our_nodeid.
*
* res_nodeid is "odd": -1 is unset/invalid, zero means our_nodeid,
* greater than zero when another nodeid.
*
* (TODO: remove res_nodeid and only use res_master_nodeid)
*/
struct dlm_rsb {
struct dlm_ls *res_ls; /* the lockspace */
struct kref res_ref;
struct mutex res_mutex;
unsigned long res_flags;
int res_length; /* length of rsb name */
int res_nodeid;
int res_master_nodeid;
int res_dir_nodeid;
int res_id; /* for ls_recover_idr */
uint32_t res_lvbseq;
uint32_t res_hash;
uint32_t res_bucket; /* rsbtbl */
unsigned long res_toss_time;
uint32_t res_first_lkid;
struct list_head res_lookup; /* lkbs waiting on first */
union {
struct list_head res_hashchain;
struct rb_node res_hashnode; /* rsbtbl */
};
struct list_head res_grantqueue;
struct list_head res_convertqueue;
struct list_head res_waitqueue;
struct list_head res_root_list; /* used for recovery */
struct list_head res_recover_list; /* used for recovery */
int res_recover_locks_count;
char *res_lvbptr;
char res_name[DLM_RESNAME_MAXLEN+1];
};
/* dlm_master_lookup() flags */
#define DLM_LU_RECOVER_DIR 1
#define DLM_LU_RECOVER_MASTER 2
/* dlm_master_lookup() results */
#define DLM_LU_MATCH 1
#define DLM_LU_ADD 2
/* find_rsb() flags */
#define R_REQUEST 0x00000001
#define R_RECEIVE_REQUEST 0x00000002
#define R_RECEIVE_RECOVER 0x00000004
/* rsb_flags */
enum rsb_flags {
RSB_MASTER_UNCERTAIN,
RSB_VALNOTVALID,
RSB_VALNOTVALID_PREV,
RSB_NEW_MASTER,
RSB_NEW_MASTER2,
RSB_RECOVER_CONVERT,
RSB_RECOVER_GRANT,
RSB_RECOVER_LVB_INVAL,
};
static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
__set_bit(flag, &r->res_flags);
}
static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
__clear_bit(flag, &r->res_flags);
}
static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
return test_bit(flag, &r->res_flags);
}
/* dlm_header is first element of all structs sent between nodes */
#define DLM_HEADER_MAJOR 0x00030000
#define DLM_HEADER_MINOR 0x00000001
#define DLM_HEADER_SLOTS 0x00000001
#define DLM_MSG 1
#define DLM_RCOM 2
struct dlm_header {
uint32_t h_version;
uint32_t h_lockspace;
uint32_t h_nodeid; /* nodeid of sender */
uint16_t h_length;
uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */
uint8_t h_pad;
};
#define DLM_MSG_REQUEST 1
#define DLM_MSG_CONVERT 2
#define DLM_MSG_UNLOCK 3
#define DLM_MSG_CANCEL 4
#define DLM_MSG_REQUEST_REPLY 5
#define DLM_MSG_CONVERT_REPLY 6
#define DLM_MSG_UNLOCK_REPLY 7
#define DLM_MSG_CANCEL_REPLY 8
#define DLM_MSG_GRANT 9
#define DLM_MSG_BAST 10
#define DLM_MSG_LOOKUP 11
#define DLM_MSG_REMOVE 12
#define DLM_MSG_LOOKUP_REPLY 13
#define DLM_MSG_PURGE 14
struct dlm_message {
struct dlm_header m_header;
uint32_t m_type; /* DLM_MSG_ */
uint32_t m_nodeid;
uint32_t m_pid;
uint32_t m_lkid; /* lkid on sender */
uint32_t m_remid; /* lkid on receiver */
uint32_t m_parent_lkid;
uint32_t m_parent_remid;
uint32_t m_exflags;
uint32_t m_sbflags;
uint32_t m_flags;
uint32_t m_lvbseq;
uint32_t m_hash;
int m_status;
int m_grmode;
int m_rqmode;
int m_bastmode;
int m_asts;
int m_result; /* 0 or -EXXX */
char m_extra[0]; /* name or lvb */
};
#define DLM_RS_NODES 0x00000001
#define DLM_RS_NODES_ALL 0x00000002
#define DLM_RS_DIR 0x00000004
#define DLM_RS_DIR_ALL 0x00000008
#define DLM_RS_LOCKS 0x00000010
#define DLM_RS_LOCKS_ALL 0x00000020
#define DLM_RS_DONE 0x00000040
#define DLM_RS_DONE_ALL 0x00000080
#define DLM_RCOM_STATUS 1
#define DLM_RCOM_NAMES 2
#define DLM_RCOM_LOOKUP 3
#define DLM_RCOM_LOCK 4
#define DLM_RCOM_STATUS_REPLY 5
#define DLM_RCOM_NAMES_REPLY 6
#define DLM_RCOM_LOOKUP_REPLY 7
#define DLM_RCOM_LOCK_REPLY 8
struct dlm_rcom {
struct dlm_header rc_header;
uint32_t rc_type; /* DLM_RCOM_ */
int rc_result; /* multi-purpose */
uint64_t rc_id; /* match reply with request */
uint64_t rc_seq; /* sender's ls_recover_seq */
uint64_t rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[0];
};
union dlm_packet {
struct dlm_header header; /* common to other two */
struct dlm_message message;
struct dlm_rcom rcom;
};
#define DLM_RSF_NEED_SLOTS 0x00000001
/* RCOM_STATUS data */
struct rcom_status {
__le32 rs_flags;
__le32 rs_unused1;
__le64 rs_unused2;
};
/* RCOM_STATUS_REPLY data */
struct rcom_config {
__le32 rf_lvblen;
__le32 rf_lsflags;
/* DLM_HEADER_SLOTS adds: */
__le32 rf_flags;
__le16 rf_our_slot;
__le16 rf_num_slots;
__le32 rf_generation;
__le32 rf_unused1;
__le64 rf_unused2;
};
struct rcom_slot {
__le32 ro_nodeid;
__le16 ro_slot;
__le16 ro_unused1;
__le64 ro_unused2;
};
struct rcom_lock {
__le32 rl_ownpid;
__le32 rl_lkid;
__le32 rl_remid;
__le32 rl_parent_lkid;
__le32 rl_parent_remid;
__le32 rl_exflags;
__le32 rl_flags;
__le32 rl_lvbseq;
__le32 rl_result;
int8_t rl_rqmode;
int8_t rl_grmode;
int8_t rl_status;
int8_t rl_asts;
__le16 rl_wait_type;
__le16 rl_namelen;
char rl_name[DLM_RESNAME_MAXLEN];
char rl_lvb[0];
};
/*
* The max number of resources per rsbtbl bucket that shrink will attempt
* to remove in each iteration.
*/
#define DLM_REMOVE_NAMES_MAX 8
struct dlm_ls {
struct list_head ls_list; /* list of lockspaces */
dlm_lockspace_t *ls_local_handle;
uint32_t ls_global_id; /* global unique lockspace ID */
uint32_t ls_generation;
uint32_t ls_exflags;
int ls_lvblen;
int ls_count; /* refcount of processes in
the dlm using this ls */
int ls_create_count; /* create/release refcount */
unsigned long ls_flags; /* LSFL_ */
unsigned long ls_scan_time;
struct kobject ls_kobj;
struct idr ls_lkbidr;
spinlock_t ls_lkbidr_spin;
struct dlm_rsbtable *ls_rsbtbl;
uint32_t ls_rsbtbl_size;
struct mutex ls_waiters_mutex;
struct list_head ls_waiters; /* lkbs needing a reply */
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
spinlock_t ls_new_rsb_spin;
int ls_new_rsb_count;
struct list_head ls_new_rsb; /* new rsb structs */
spinlock_t ls_remove_spin;
char ls_remove_name[DLM_RESNAME_MAXLEN+1];
char *ls_remove_names[DLM_REMOVE_NAMES_MAX];
int ls_remove_len;
int ls_remove_lens[DLM_REMOVE_NAMES_MAX];
struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */
int ls_low_nodeid;
int ls_total_weight;
int *ls_node_array;
int ls_slot;
int ls_num_slots;
int ls_slots_size;
struct dlm_slot *ls_slots;
struct dlm_rsb ls_stub_rsb; /* for returning errors */
struct dlm_lkb ls_stub_lkb; /* for returning errors */
struct dlm_message ls_stub_ms; /* for faking a reply */
struct dentry *ls_debug_rsb_dentry; /* debugfs */
struct dentry *ls_debug_waiters_dentry; /* debugfs */
struct dentry *ls_debug_locks_dentry; /* debugfs */
struct dentry *ls_debug_all_dentry; /* debugfs */
struct dentry *ls_debug_toss_dentry; /* debugfs */
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
struct completion ls_members_done;
int ls_members_result;
struct miscdevice ls_device;
struct workqueue_struct *ls_callback_wq;
/* recovery related */
struct mutex ls_cb_mutex;
struct list_head ls_cb_delay; /* save for queue_work later */
struct timer_list ls_timer;
struct task_struct *ls_recoverd_task;
struct mutex ls_recoverd_active;
spinlock_t ls_recover_lock;
unsigned long ls_recover_begin; /* jiffies timestamp */
uint32_t ls_recover_status; /* DLM_RS_ */
uint64_t ls_recover_seq;
struct dlm_recover *ls_recover_args;
struct rw_semaphore ls_in_recovery; /* block local requests */
struct rw_semaphore ls_recv_active; /* block dlm_recv */
struct list_head ls_requestqueue;/* queue remote requests */
struct mutex ls_requestqueue_mutex;
struct dlm_rcom *ls_recover_buf;
int ls_recover_nodeid; /* for debugging */
unsigned int ls_recover_dir_sent_res; /* for log info */
unsigned int ls_recover_dir_sent_msg; /* for log info */
unsigned int ls_recover_locks_in; /* for log info */
uint64_t ls_rcom_seq;
spinlock_t ls_rcom_spin;
struct list_head ls_recover_list;
spinlock_t ls_recover_list_lock;
int ls_recover_list_count;
struct idr ls_recover_idr;
spinlock_t ls_recover_idr_lock;
wait_queue_head_t ls_wait_general;
wait_queue_head_t ls_recover_lock_wait;
struct mutex ls_clear_proc_locks;
struct list_head ls_root_list; /* root resources */
struct rw_semaphore ls_root_sem; /* protect root_list */
const struct dlm_lockspace_ops *ls_ops;
void *ls_ops_arg;
int ls_namelen;
char ls_name[1];
};
/*
* LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
* that they should abort what they're doing so new recovery can be started.
*
* LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
* should do down_write() on the in_recovery rw_semaphore. (doing down_write
* within dlm_ls_stop causes complaints about the lock acquired/released
* in different contexts.)
*
* LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
* It sets this after it is done with down_write() on the in_recovery
* rw_semaphore and clears it after it has released the rw_semaphore.
*
* LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
* should begin recovery of the lockspace.
*
* LSFL_RUNNING - set when normal locking activity is enabled.
* dlm_ls_stop() clears this to tell dlm locking routines that they should
* quit what they are doing so recovery can run. dlm_recoverd sets
* this after recovery is finished.
*/
#define LSFL_RECOVER_STOP 0
#define LSFL_RECOVER_DOWN 1
#define LSFL_RECOVER_LOCK 2
#define LSFL_RECOVER_WORK 3
#define LSFL_RUNNING 4
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
#define LSFL_TIMEWARN 8
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
struct dlm_user_args {
struct dlm_user_proc *proc; /* each process that opens the lockspace
device has private data
(dlm_user_proc) on the struct file,
the process's locks point back to it*/
struct dlm_lksb lksb;
struct dlm_lksb __user *user_lksb;
void __user *castparam;
void __user *castaddr;
void __user *bastparam;
void __user *bastaddr;
uint64_t xid;
};
#define DLM_PROC_FLAGS_CLOSING 1
#define DLM_PROC_FLAGS_COMPAT 2
/* locks list is kept so we can remove all a process's locks when it
exits (or orphan those that are persistent) */
struct dlm_user_proc {
dlm_lockspace_t *lockspace;
unsigned long flags; /* DLM_PROC_FLAGS */
struct list_head asts;
spinlock_t asts_spin;
struct list_head locks;
spinlock_t locks_spin;
struct list_head unlocking;
wait_queue_head_t wait;
};
static inline int dlm_locking_stopped(struct dlm_ls *ls)
{
return !test_bit(LSFL_RUNNING, &ls->ls_flags);
}
static inline int dlm_recovery_stopped(struct dlm_ls *ls)
{
return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
}
static inline int dlm_no_directory(struct dlm_ls *ls)
{
return test_bit(LSFL_NODIR, &ls->ls_flags);
}
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);
int dlm_plock_init(void);
void dlm_plock_exit(void);
#ifdef CONFIG_DLM_DEBUG
int dlm_register_debugfs(void);
void dlm_unregister_debugfs(void);
int dlm_create_debug_file(struct dlm_ls *ls);
void dlm_delete_debug_file(struct dlm_ls *ls);
#else
static inline int dlm_register_debugfs(void) { return 0; }
static inline void dlm_unregister_debugfs(void) { }
static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
#endif
#endif /* __DLM_INTERNAL_DOT_H__ */

6232
fs/dlm/lock.c Normal file

File diff suppressed because it is too large Load diff

77
fs/dlm/lock.h Normal file
View file

@ -0,0 +1,77 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LOCK_DOT_H__
#define __LOCK_DOT_H__
void dlm_dump_rsb(struct dlm_rsb *r);
void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len);
void dlm_print_lkb(struct dlm_lkb *lkb);
void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms,
uint32_t saved_seq);
void dlm_receive_buffer(union dlm_packet *p, int nodeid);
int dlm_modes_compat(int mode1, int mode2);
void dlm_put_rsb(struct dlm_rsb *r);
void dlm_hold_rsb(struct dlm_rsb *r);
int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
void dlm_scan_waiters(struct dlm_ls *ls);
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
struct dlm_rsb **r_ret);
void dlm_recover_purge(struct dlm_ls *ls);
void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
void dlm_recover_grant(struct dlm_ls *ls);
int dlm_recover_waiters_post(struct dlm_ls *ls);
void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid);
int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
int nodeid, int pid);
int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
static inline int is_master(struct dlm_rsb *r)
{
return !r->res_nodeid;
}
static inline void lock_rsb(struct dlm_rsb *r)
{
mutex_lock(&r->res_mutex);
}
static inline void unlock_rsb(struct dlm_rsb *r)
{
mutex_unlock(&r->res_mutex);
}
#endif

917
fs/dlm/lockspace.c Normal file
View file

@ -0,0 +1,917 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "recoverd.h"
#include "dir.h"
#include "lowcomms.h"
#include "config.h"
#include "memory.h"
#include "lock.h"
#include "recover.h"
#include "requestqueue.h"
#include "user.h"
#include "ast.h"
static int ls_count;
static struct mutex ls_lock;
static struct list_head lslist;
static spinlock_t lslist_lock;
static struct task_struct * scand_task;
static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
{
ssize_t ret = len;
int n;
int rc = kstrtoint(buf, 0, &n);
if (rc)
return rc;
ls = dlm_find_lockspace_local(ls->ls_local_handle);
if (!ls)
return -EINVAL;
switch (n) {
case 0:
dlm_ls_stop(ls);
break;
case 1:
dlm_ls_start(ls);
break;
default:
ret = -EINVAL;
}
dlm_put_lockspace(ls);
return ret;
}
static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
{
int rc = kstrtoint(buf, 0, &ls->ls_uevent_result);
if (rc)
return rc;
set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
wake_up(&ls->ls_uevent_wait);
return len;
}
static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
}
static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
{
int rc = kstrtouint(buf, 0, &ls->ls_global_id);
if (rc)
return rc;
return len;
}
static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
}
static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
{
int val;
int rc = kstrtoint(buf, 0, &val);
if (rc)
return rc;
if (val == 1)
set_bit(LSFL_NODIR, &ls->ls_flags);
return len;
}
static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
{
uint32_t status = dlm_recover_status(ls);
return snprintf(buf, PAGE_SIZE, "%x\n", status);
}
static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
}
struct dlm_attr {
struct attribute attr;
ssize_t (*show)(struct dlm_ls *, char *);
ssize_t (*store)(struct dlm_ls *, const char *, size_t);
};
static struct dlm_attr dlm_attr_control = {
.attr = {.name = "control", .mode = S_IWUSR},
.store = dlm_control_store
};
static struct dlm_attr dlm_attr_event = {
.attr = {.name = "event_done", .mode = S_IWUSR},
.store = dlm_event_store
};
static struct dlm_attr dlm_attr_id = {
.attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
.show = dlm_id_show,
.store = dlm_id_store
};
static struct dlm_attr dlm_attr_nodir = {
.attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
.show = dlm_nodir_show,
.store = dlm_nodir_store
};
static struct dlm_attr dlm_attr_recover_status = {
.attr = {.name = "recover_status", .mode = S_IRUGO},
.show = dlm_recover_status_show
};
static struct dlm_attr dlm_attr_recover_nodeid = {
.attr = {.name = "recover_nodeid", .mode = S_IRUGO},
.show = dlm_recover_nodeid_show
};
static struct attribute *dlm_attrs[] = {
&dlm_attr_control.attr,
&dlm_attr_event.attr,
&dlm_attr_id.attr,
&dlm_attr_nodir.attr,
&dlm_attr_recover_status.attr,
&dlm_attr_recover_nodeid.attr,
NULL,
};
static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
return a->show ? a->show(ls, buf) : 0;
}
static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
return a->store ? a->store(ls, buf, len) : len;
}
static void lockspace_kobj_release(struct kobject *k)
{
struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
kfree(ls);
}
static const struct sysfs_ops dlm_attr_ops = {
.show = dlm_attr_show,
.store = dlm_attr_store,
};
static struct kobj_type dlm_ktype = {
.default_attrs = dlm_attrs,
.sysfs_ops = &dlm_attr_ops,
.release = lockspace_kobj_release,
};
static struct kset *dlm_kset;
static int do_uevent(struct dlm_ls *ls, int in)
{
int error;
if (in)
kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
else
kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving");
/* dlm_controld will see the uevent, do the necessary group management
and then write to sysfs to wake us */
error = wait_event_interruptible(ls->ls_uevent_wait,
test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
log_rinfo(ls, "group event done %d %d", error, ls->ls_uevent_result);
if (error)
goto out;
error = ls->ls_uevent_result;
out:
if (error)
log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
error, ls->ls_uevent_result);
return error;
}
static int dlm_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
return 0;
}
static struct kset_uevent_ops dlm_uevent_ops = {
.uevent = dlm_uevent,
};
int __init dlm_lockspace_init(void)
{
ls_count = 0;
mutex_init(&ls_lock);
INIT_LIST_HEAD(&lslist);
spin_lock_init(&lslist_lock);
dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
if (!dlm_kset) {
printk(KERN_WARNING "%s: can not create kset\n", __func__);
return -ENOMEM;
}
return 0;
}
void dlm_lockspace_exit(void)
{
kset_unregister(dlm_kset);
}
static struct dlm_ls *find_ls_to_scan(void)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (time_after_eq(jiffies, ls->ls_scan_time +
dlm_config.ci_scan_secs * HZ)) {
spin_unlock(&lslist_lock);
return ls;
}
}
spin_unlock(&lslist_lock);
return NULL;
}
static int dlm_scand(void *data)
{
struct dlm_ls *ls;
while (!kthread_should_stop()) {
ls = find_ls_to_scan();
if (ls) {
if (dlm_lock_recovery_try(ls)) {
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
}
continue;
}
schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
static int dlm_scand_start(void)
{
struct task_struct *p;
int error = 0;
p = kthread_run(dlm_scand, NULL, "dlm_scand");
if (IS_ERR(p))
error = PTR_ERR(p);
else
scand_task = p;
return error;
}
static void dlm_scand_stop(void)
{
kthread_stop(scand_task);
}
struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_global_id == id) {
ls->ls_count++;
goto out;
}
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_local_handle == lockspace) {
ls->ls_count++;
goto out;
}
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
struct dlm_ls *dlm_find_lockspace_device(int minor)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_device.minor == minor) {
ls->ls_count++;
goto out;
}
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
void dlm_put_lockspace(struct dlm_ls *ls)
{
spin_lock(&lslist_lock);
ls->ls_count--;
spin_unlock(&lslist_lock);
}
static void remove_lockspace(struct dlm_ls *ls)
{
for (;;) {
spin_lock(&lslist_lock);
if (ls->ls_count == 0) {
WARN_ON(ls->ls_create_count != 0);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
return;
}
spin_unlock(&lslist_lock);
ssleep(1);
}
}
static int threads_start(void)
{
int error;
error = dlm_scand_start();
if (error) {
log_print("cannot start dlm_scand thread %d", error);
goto fail;
}
/* Thread for sending/receiving messages for all lockspace's */
error = dlm_lowcomms_start();
if (error) {
log_print("cannot start dlm lowcomms %d", error);
goto scand_fail;
}
return 0;
scand_fail:
dlm_scand_stop();
fail:
return error;
}
static void threads_stop(void)
{
dlm_scand_stop();
dlm_lowcomms_stop();
}
static int new_lockspace(const char *name, const char *cluster,
uint32_t flags, int lvblen,
const struct dlm_lockspace_ops *ops, void *ops_arg,
int *ops_result, dlm_lockspace_t **lockspace)
{
struct dlm_ls *ls;
int i, size, error;
int do_unreg = 0;
int namelen = strlen(name);
if (namelen > DLM_LOCKSPACE_LEN)
return -EINVAL;
if (!lvblen || (lvblen % 8))
return -EINVAL;
if (!try_module_get(THIS_MODULE))
return -EINVAL;
if (!dlm_user_daemon_available()) {
log_print("dlm user daemon not available");
error = -EUNATCH;
goto out;
}
if (ops && ops_result) {
if (!dlm_config.ci_recover_callbacks)
*ops_result = -EOPNOTSUPP;
else
*ops_result = 0;
}
if (dlm_config.ci_recover_callbacks && cluster &&
strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
log_print("dlm cluster name %s mismatch %s",
dlm_config.ci_cluster_name, cluster);
error = -EBADR;
goto out;
}
error = 0;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
WARN_ON(ls->ls_create_count <= 0);
if (ls->ls_namelen != namelen)
continue;
if (memcmp(ls->ls_name, name, namelen))
continue;
if (flags & DLM_LSFL_NEWEXCL) {
error = -EEXIST;
break;
}
ls->ls_create_count++;
*lockspace = ls;
error = 1;
break;
}
spin_unlock(&lslist_lock);
if (error)
goto out;
error = -ENOMEM;
ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
if (!ls)
goto out;
memcpy(ls->ls_name, name, namelen);
ls->ls_namelen = namelen;
ls->ls_lvblen = lvblen;
ls->ls_count = 0;
ls->ls_flags = 0;
ls->ls_scan_time = jiffies;
if (ops && dlm_config.ci_recover_callbacks) {
ls->ls_ops = ops;
ls->ls_ops_arg = ops_arg;
}
if (flags & DLM_LSFL_TIMEWARN)
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
/* ls_exflags are forced to match among nodes, and we don't
need to require all nodes to have some flags set */
ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
DLM_LSFL_NEWEXCL));
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
if (!ls->ls_rsbtbl)
goto out_lsfree;
for (i = 0; i < size; i++) {
ls->ls_rsbtbl[i].keep.rb_node = NULL;
ls->ls_rsbtbl[i].toss.rb_node = NULL;
spin_lock_init(&ls->ls_rsbtbl[i].lock);
}
spin_lock_init(&ls->ls_remove_spin);
for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
GFP_KERNEL);
if (!ls->ls_remove_names[i])
goto out_rsbtbl;
}
idr_init(&ls->ls_lkbidr);
spin_lock_init(&ls->ls_lkbidr_spin);
INIT_LIST_HEAD(&ls->ls_waiters);
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
INIT_LIST_HEAD(&ls->ls_new_rsb);
spin_lock_init(&ls->ls_new_rsb_spin);
INIT_LIST_HEAD(&ls->ls_nodes);
INIT_LIST_HEAD(&ls->ls_nodes_gone);
ls->ls_num_nodes = 0;
ls->ls_low_nodeid = 0;
ls->ls_total_weight = 0;
ls->ls_node_array = NULL;
memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
ls->ls_stub_rsb.res_ls = ls;
ls->ls_debug_rsb_dentry = NULL;
ls->ls_debug_waiters_dentry = NULL;
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
init_completion(&ls->ls_members_done);
ls->ls_members_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active);
spin_lock_init(&ls->ls_recover_lock);
spin_lock_init(&ls->ls_rcom_spin);
get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
ls->ls_recover_status = 0;
ls->ls_recover_seq = 0;
ls->ls_recover_args = NULL;
init_rwsem(&ls->ls_in_recovery);
init_rwsem(&ls->ls_recv_active);
INIT_LIST_HEAD(&ls->ls_requestqueue);
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
if (!ls->ls_recover_buf)
goto out_lkbidr;
ls->ls_slot = 0;
ls->ls_num_slots = 0;
ls->ls_slots_size = 0;
ls->ls_slots = NULL;
INIT_LIST_HEAD(&ls->ls_recover_list);
spin_lock_init(&ls->ls_recover_list_lock);
idr_init(&ls->ls_recover_idr);
spin_lock_init(&ls->ls_recover_idr_lock);
ls->ls_recover_list_count = 0;
ls->ls_local_handle = ls;
init_waitqueue_head(&ls->ls_wait_general);
INIT_LIST_HEAD(&ls->ls_root_list);
init_rwsem(&ls->ls_root_sem);
spin_lock(&lslist_lock);
ls->ls_create_count = 1;
list_add(&ls->ls_list, &lslist);
spin_unlock(&lslist_lock);
if (flags & DLM_LSFL_FS) {
error = dlm_callback_start(ls);
if (error) {
log_error(ls, "can't start dlm_callback %d", error);
goto out_delist;
}
}
init_waitqueue_head(&ls->ls_recover_lock_wait);
/*
* Once started, dlm_recoverd first looks for ls in lslist, then
* initializes ls_in_recovery as locked in "down" mode. We need
* to wait for the wakeup from dlm_recoverd because in_recovery
* has to start out in down mode.
*/
error = dlm_recoverd_start(ls);
if (error) {
log_error(ls, "can't start dlm_recoverd %d", error);
goto out_callback;
}
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name);
if (error)
goto out_recoverd;
kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
/* let kobject handle freeing of ls if there's an error */
do_unreg = 1;
/* This uevent triggers dlm_controld in userspace to add us to the
group of nodes that are members of this lockspace (managed by the
cluster infrastructure.) Once it's done that, it tells us who the
current lockspace members are (via configfs) and then tells the
lockspace to start running (via sysfs) in dlm_ls_start(). */
error = do_uevent(ls, 1);
if (error)
goto out_recoverd;
wait_for_completion(&ls->ls_members_done);
error = ls->ls_members_result;
if (error)
goto out_members;
dlm_create_debug_file(ls);
log_rinfo(ls, "join complete");
*lockspace = ls;
return 0;
out_members:
do_uevent(ls, 0);
dlm_clear_members(ls);
kfree(ls->ls_node_array);
out_recoverd:
dlm_recoverd_stop(ls);
out_callback:
dlm_callback_stop(ls);
out_delist:
spin_lock(&lslist_lock);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
idr_destroy(&ls->ls_recover_idr);
kfree(ls->ls_recover_buf);
out_lkbidr:
idr_destroy(&ls->ls_lkbidr);
for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
if (ls->ls_remove_names[i])
kfree(ls->ls_remove_names[i]);
}
out_rsbtbl:
vfree(ls->ls_rsbtbl);
out_lsfree:
if (do_unreg)
kobject_put(&ls->ls_kobj);
else
kfree(ls);
out:
module_put(THIS_MODULE);
return error;
}
int dlm_new_lockspace(const char *name, const char *cluster,
uint32_t flags, int lvblen,
const struct dlm_lockspace_ops *ops, void *ops_arg,
int *ops_result, dlm_lockspace_t **lockspace)
{
int error = 0;
mutex_lock(&ls_lock);
if (!ls_count)
error = threads_start();
if (error)
goto out;
error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
ops_result, lockspace);
if (!error)
ls_count++;
if (error > 0)
error = 0;
if (!ls_count)
threads_stop();
out:
mutex_unlock(&ls_lock);
return error;
}
static int lkb_idr_is_local(int id, void *p, void *data)
{
struct dlm_lkb *lkb = p;
return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
}
static int lkb_idr_is_any(int id, void *p, void *data)
{
return 1;
}
static int lkb_idr_free(int id, void *p, void *data)
{
struct dlm_lkb *lkb = p;
if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
dlm_free_lvb(lkb->lkb_lvbptr);
dlm_free_lkb(lkb);
return 0;
}
/* NOTE: We check the lkbidr here rather than the resource table.
This is because there may be LKBs queued as ASTs that have been unlinked
from their RSBs and are pending deletion once the AST has been delivered */
static int lockspace_busy(struct dlm_ls *ls, int force)
{
int rv;
spin_lock(&ls->ls_lkbidr_spin);
if (force == 0) {
rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
} else if (force == 1) {
rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
} else {
rv = 0;
}
spin_unlock(&ls->ls_lkbidr_spin);
return rv;
}
static int release_lockspace(struct dlm_ls *ls, int force)
{
struct dlm_rsb *rsb;
struct rb_node *n;
int i, busy, rv;
busy = lockspace_busy(ls, force);
spin_lock(&lslist_lock);
if (ls->ls_create_count == 1) {
if (busy) {
rv = -EBUSY;
} else {
/* remove_lockspace takes ls off lslist */
ls->ls_create_count = 0;
rv = 0;
}
} else if (ls->ls_create_count > 1) {
rv = --ls->ls_create_count;
} else {
rv = -EINVAL;
}
spin_unlock(&lslist_lock);
if (rv) {
log_debug(ls, "release_lockspace no remove %d", rv);
return rv;
}
dlm_device_deregister(ls);
if (force < 3 && dlm_user_daemon_available())
do_uevent(ls, 0);
dlm_recoverd_stop(ls);
dlm_callback_stop(ls);
remove_lockspace(ls);
dlm_delete_debug_file(ls);
kfree(ls->ls_recover_buf);
/*
* Free all lkb's in idr
*/
idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
idr_destroy(&ls->ls_lkbidr);
/*
* Free all rsb's on rsbtbl[] lists
*/
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
rb_erase(n, &ls->ls_rsbtbl[i].keep);
dlm_free_rsb(rsb);
}
while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
rb_erase(n, &ls->ls_rsbtbl[i].toss);
dlm_free_rsb(rsb);
}
}
vfree(ls->ls_rsbtbl);
for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
kfree(ls->ls_remove_names[i]);
while (!list_empty(&ls->ls_new_rsb)) {
rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
res_hashchain);
list_del(&rsb->res_hashchain);
dlm_free_rsb(rsb);
}
/*
* Free structures on any other lists
*/
dlm_purge_requestqueue(ls);
kfree(ls->ls_recover_args);
dlm_clear_members(ls);
dlm_clear_members_gone(ls);
kfree(ls->ls_node_array);
log_rinfo(ls, "release_lockspace final free");
kobject_put(&ls->ls_kobj);
/* The ls structure will be freed when the kobject is done with */
module_put(THIS_MODULE);
return 0;
}
/*
* Called when a system has released all its locks and is not going to use the
* lockspace any longer. We free everything we're managing for this lockspace.
* Remaining nodes will go through the recovery process as if we'd died. The
* lockspace must continue to function as usual, participating in recoveries,
* until this returns.
*
* Force has 4 possible values:
* 0 - don't destroy locksapce if it has any LKBs
* 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
* 2 - destroy lockspace regardless of LKBs
* 3 - destroy lockspace as part of a forced shutdown
*/
int dlm_release_lockspace(void *lockspace, int force)
{
struct dlm_ls *ls;
int error;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
dlm_put_lockspace(ls);
mutex_lock(&ls_lock);
error = release_lockspace(ls, force);
if (!error)
ls_count--;
if (!ls_count)
threads_stop();
mutex_unlock(&ls_lock);
return error;
}
void dlm_stop_lockspaces(void)
{
struct dlm_ls *ls;
int count;
restart:
count = 0;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) {
count++;
continue;
}
spin_unlock(&lslist_lock);
log_error(ls, "no userland control daemon, stopping lockspace");
dlm_ls_stop(ls);
goto restart;
}
spin_unlock(&lslist_lock);
if (count)
log_print("dlm user daemon left %d lockspaces", count);
}

26
fs/dlm/lockspace.h Normal file
View file

@ -0,0 +1,26 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LOCKSPACE_DOT_H__
#define __LOCKSPACE_DOT_H__
int dlm_lockspace_init(void);
void dlm_lockspace_exit(void);
struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
struct dlm_ls *dlm_find_lockspace_local(void *id);
struct dlm_ls *dlm_find_lockspace_device(int minor);
void dlm_put_lockspace(struct dlm_ls *ls);
void dlm_stop_lockspaces(void);
#endif /* __LOCKSPACE_DOT_H__ */

1830
fs/dlm/lowcomms.c Normal file

File diff suppressed because it is too large Load diff

27
fs/dlm/lowcomms.h Normal file
View file

@ -0,0 +1,27 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LOWCOMMS_DOT_H__
#define __LOWCOMMS_DOT_H__
int dlm_lowcomms_start(void);
void dlm_lowcomms_stop(void);
void dlm_lowcomms_exit(void);
int dlm_lowcomms_close(int nodeid);
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
void dlm_lowcomms_commit_buffer(void *mh);
int dlm_lowcomms_connect_node(int nodeid);
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
#endif /* __LOWCOMMS_DOT_H__ */

18
fs/dlm/lvb_table.h Normal file
View file

@ -0,0 +1,18 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LVB_TABLE_DOT_H__
#define __LVB_TABLE_DOT_H__
extern const int dlm_lvb_operations[8][8];
#endif

97
fs/dlm/main.c Normal file
View file

@ -0,0 +1,97 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
#include "user.h"
#include "memory.h"
#include "config.h"
#include "lowcomms.h"
static int __init init_dlm(void)
{
int error;
error = dlm_memory_init();
if (error)
goto out;
error = dlm_lockspace_init();
if (error)
goto out_mem;
error = dlm_config_init();
if (error)
goto out_lockspace;
error = dlm_register_debugfs();
if (error)
goto out_config;
error = dlm_user_init();
if (error)
goto out_debug;
error = dlm_netlink_init();
if (error)
goto out_user;
error = dlm_plock_init();
if (error)
goto out_netlink;
printk("DLM installed\n");
return 0;
out_netlink:
dlm_netlink_exit();
out_user:
dlm_user_exit();
out_debug:
dlm_unregister_debugfs();
out_config:
dlm_config_exit();
out_lockspace:
dlm_lockspace_exit();
out_mem:
dlm_memory_exit();
out:
return error;
}
static void __exit exit_dlm(void)
{
dlm_plock_exit();
dlm_netlink_exit();
dlm_user_exit();
dlm_config_exit();
dlm_memory_exit();
dlm_lockspace_exit();
dlm_lowcomms_exit();
dlm_unregister_debugfs();
}
module_init(init_dlm);
module_exit(exit_dlm);
MODULE_DESCRIPTION("Distributed Lock Manager");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");
EXPORT_SYMBOL_GPL(dlm_new_lockspace);
EXPORT_SYMBOL_GPL(dlm_release_lockspace);
EXPORT_SYMBOL_GPL(dlm_lock);
EXPORT_SYMBOL_GPL(dlm_unlock);

722
fs/dlm/member.c Normal file
View file

@ -0,0 +1,722 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "recoverd.h"
#include "recover.h"
#include "rcom.h"
#include "config.h"
#include "lowcomms.h"
int dlm_slots_version(struct dlm_header *h)
{
if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS)
return 0;
return 1;
}
void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
struct dlm_member *memb)
{
struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
if (!dlm_slots_version(&rc->rc_header))
return;
memb->slot = le16_to_cpu(rf->rf_our_slot);
memb->generation = le32_to_cpu(rf->rf_generation);
}
void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc)
{
struct dlm_slot *slot;
struct rcom_slot *ro;
int i;
ro = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config));
/* ls_slots array is sparse, but not rcom_slots */
for (i = 0; i < ls->ls_slots_size; i++) {
slot = &ls->ls_slots[i];
if (!slot->nodeid)
continue;
ro->ro_nodeid = cpu_to_le32(slot->nodeid);
ro->ro_slot = cpu_to_le16(slot->slot);
ro++;
}
}
#define SLOT_DEBUG_LINE 128
static void log_slots(struct dlm_ls *ls, uint32_t gen, int num_slots,
struct rcom_slot *ro0, struct dlm_slot *array,
int array_size)
{
char line[SLOT_DEBUG_LINE];
int len = SLOT_DEBUG_LINE - 1;
int pos = 0;
int ret, i;
memset(line, 0, sizeof(line));
if (array) {
for (i = 0; i < array_size; i++) {
if (!array[i].nodeid)
continue;
ret = snprintf(line + pos, len - pos, " %d:%d",
array[i].slot, array[i].nodeid);
if (ret >= len - pos)
break;
pos += ret;
}
} else if (ro0) {
for (i = 0; i < num_slots; i++) {
ret = snprintf(line + pos, len - pos, " %d:%d",
ro0[i].ro_slot, ro0[i].ro_nodeid);
if (ret >= len - pos)
break;
pos += ret;
}
}
log_rinfo(ls, "generation %u slots %d%s", gen, num_slots, line);
}
int dlm_slots_copy_in(struct dlm_ls *ls)
{
struct dlm_member *memb;
struct dlm_rcom *rc = ls->ls_recover_buf;
struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
struct rcom_slot *ro0, *ro;
int our_nodeid = dlm_our_nodeid();
int i, num_slots;
uint32_t gen;
if (!dlm_slots_version(&rc->rc_header))
return -1;
gen = le32_to_cpu(rf->rf_generation);
if (gen <= ls->ls_generation) {
log_error(ls, "dlm_slots_copy_in gen %u old %u",
gen, ls->ls_generation);
}
ls->ls_generation = gen;
num_slots = le16_to_cpu(rf->rf_num_slots);
if (!num_slots)
return -1;
ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config));
for (i = 0, ro = ro0; i < num_slots; i++, ro++) {
ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid);
ro->ro_slot = le16_to_cpu(ro->ro_slot);
}
log_slots(ls, gen, num_slots, ro0, NULL, 0);
list_for_each_entry(memb, &ls->ls_nodes, list) {
for (i = 0, ro = ro0; i < num_slots; i++, ro++) {
if (ro->ro_nodeid != memb->nodeid)
continue;
memb->slot = ro->ro_slot;
memb->slot_prev = memb->slot;
break;
}
if (memb->nodeid == our_nodeid) {
if (ls->ls_slot && ls->ls_slot != memb->slot) {
log_error(ls, "dlm_slots_copy_in our slot "
"changed %d %d", ls->ls_slot,
memb->slot);
return -1;
}
if (!ls->ls_slot)
ls->ls_slot = memb->slot;
}
if (!memb->slot) {
log_error(ls, "dlm_slots_copy_in nodeid %d no slot",
memb->nodeid);
return -1;
}
}
return 0;
}
/* for any nodes that do not support slots, we will not have set memb->slot
in wait_status_all(), so memb->slot will remain -1, and we will not
assign slots or set ls_num_slots here */
int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
struct dlm_slot **slots_out, uint32_t *gen_out)
{
struct dlm_member *memb;
struct dlm_slot *array;
int our_nodeid = dlm_our_nodeid();
int array_size, max_slots, i;
int need = 0;
int max = 0;
int num = 0;
uint32_t gen = 0;
/* our own memb struct will have slot -1 gen 0 */
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->nodeid == our_nodeid) {
memb->slot = ls->ls_slot;
memb->generation = ls->ls_generation;
break;
}
}
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->generation > gen)
gen = memb->generation;
/* node doesn't support slots */
if (memb->slot == -1)
return -1;
/* node needs a slot assigned */
if (!memb->slot)
need++;
/* node has a slot assigned */
num++;
if (!max || max < memb->slot)
max = memb->slot;
/* sanity check, once slot is assigned it shouldn't change */
if (memb->slot_prev && memb->slot && memb->slot_prev != memb->slot) {
log_error(ls, "nodeid %d slot changed %d %d",
memb->nodeid, memb->slot_prev, memb->slot);
return -1;
}
memb->slot_prev = memb->slot;
}
array_size = max + need;
array = kzalloc(array_size * sizeof(struct dlm_slot), GFP_NOFS);
if (!array)
return -ENOMEM;
num = 0;
/* fill in slots (offsets) that are used */
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (!memb->slot)
continue;
if (memb->slot > array_size) {
log_error(ls, "invalid slot number %d", memb->slot);
kfree(array);
return -1;
}
array[memb->slot - 1].nodeid = memb->nodeid;
array[memb->slot - 1].slot = memb->slot;
num++;
}
/* assign new slots from unused offsets */
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->slot)
continue;
for (i = 0; i < array_size; i++) {
if (array[i].nodeid)
continue;
memb->slot = i + 1;
memb->slot_prev = memb->slot;
array[i].nodeid = memb->nodeid;
array[i].slot = memb->slot;
num++;
if (!ls->ls_slot && memb->nodeid == our_nodeid)
ls->ls_slot = memb->slot;
break;
}
if (!memb->slot) {
log_error(ls, "no free slot found");
kfree(array);
return -1;
}
}
gen++;
log_slots(ls, gen, num, NULL, array, array_size);
max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) -
sizeof(struct rcom_config)) / sizeof(struct rcom_slot);
if (num > max_slots) {
log_error(ls, "num_slots %d exceeds max_slots %d",
num, max_slots);
kfree(array);
return -1;
}
*gen_out = gen;
*slots_out = array;
*slots_size = array_size;
*num_slots = num;
return 0;
}
static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
{
struct dlm_member *memb = NULL;
struct list_head *tmp;
struct list_head *newlist = &new->list;
struct list_head *head = &ls->ls_nodes;
list_for_each(tmp, head) {
memb = list_entry(tmp, struct dlm_member, list);
if (new->nodeid < memb->nodeid)
break;
}
if (!memb)
list_add_tail(newlist, head);
else {
/* FIXME: can use list macro here */
newlist->prev = tmp->prev;
newlist->next = tmp;
tmp->prev->next = newlist;
tmp->prev = newlist;
}
}
static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node)
{
struct dlm_member *memb;
int error;
memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
if (!memb)
return -ENOMEM;
error = dlm_lowcomms_connect_node(node->nodeid);
if (error < 0) {
kfree(memb);
return error;
}
memb->nodeid = node->nodeid;
memb->weight = node->weight;
memb->comm_seq = node->comm_seq;
add_ordered_member(ls, memb);
ls->ls_num_nodes++;
return 0;
}
static struct dlm_member *find_memb(struct list_head *head, int nodeid)
{
struct dlm_member *memb;
list_for_each_entry(memb, head, list) {
if (memb->nodeid == nodeid)
return memb;
}
return NULL;
}
int dlm_is_member(struct dlm_ls *ls, int nodeid)
{
if (find_memb(&ls->ls_nodes, nodeid))
return 1;
return 0;
}
int dlm_is_removed(struct dlm_ls *ls, int nodeid)
{
if (find_memb(&ls->ls_nodes_gone, nodeid))
return 1;
return 0;
}
static void clear_memb_list(struct list_head *head)
{
struct dlm_member *memb;
while (!list_empty(head)) {
memb = list_entry(head->next, struct dlm_member, list);
list_del(&memb->list);
kfree(memb);
}
}
void dlm_clear_members(struct dlm_ls *ls)
{
clear_memb_list(&ls->ls_nodes);
ls->ls_num_nodes = 0;
}
void dlm_clear_members_gone(struct dlm_ls *ls)
{
clear_memb_list(&ls->ls_nodes_gone);
}
static void make_member_array(struct dlm_ls *ls)
{
struct dlm_member *memb;
int i, w, x = 0, total = 0, all_zero = 0, *array;
kfree(ls->ls_node_array);
ls->ls_node_array = NULL;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->weight)
total += memb->weight;
}
/* all nodes revert to weight of 1 if all have weight 0 */
if (!total) {
total = ls->ls_num_nodes;
all_zero = 1;
}
ls->ls_total_weight = total;
array = kmalloc(sizeof(int) * total, GFP_NOFS);
if (!array)
return;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (!all_zero && !memb->weight)
continue;
if (all_zero)
w = 1;
else
w = memb->weight;
DLM_ASSERT(x < total, printk("total %d x %d\n", total, x););
for (i = 0; i < w; i++)
array[x++] = memb->nodeid;
}
ls->ls_node_array = array;
}
/* send a status request to all members just to establish comms connections */
static int ping_members(struct dlm_ls *ls)
{
struct dlm_member *memb;
int error = 0;
list_for_each_entry(memb, &ls->ls_nodes, list) {
error = dlm_recovery_stopped(ls);
if (error)
break;
error = dlm_rcom_status(ls, memb->nodeid, 0);
if (error)
break;
}
if (error)
log_rinfo(ls, "ping_members aborted %d last nodeid %d",
error, ls->ls_recover_nodeid);
return error;
}
static void dlm_lsop_recover_prep(struct dlm_ls *ls)
{
if (!ls->ls_ops || !ls->ls_ops->recover_prep)
return;
ls->ls_ops->recover_prep(ls->ls_ops_arg);
}
static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
{
struct dlm_slot slot;
uint32_t seq;
int error;
if (!ls->ls_ops || !ls->ls_ops->recover_slot)
return;
/* if there is no comms connection with this node
or the present comms connection is newer
than the one when this member was added, then
we consider the node to have failed (versus
being removed due to dlm_release_lockspace) */
error = dlm_comm_seq(memb->nodeid, &seq);
if (!error && seq == memb->comm_seq)
return;
slot.nodeid = memb->nodeid;
slot.slot = memb->slot;
ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot);
}
void dlm_lsop_recover_done(struct dlm_ls *ls)
{
struct dlm_member *memb;
struct dlm_slot *slots;
int i, num;
if (!ls->ls_ops || !ls->ls_ops->recover_done)
return;
num = ls->ls_num_nodes;
slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
if (!slots)
return;
i = 0;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (i == num) {
log_error(ls, "dlm_lsop_recover_done bad num %d", num);
goto out;
}
slots[i].nodeid = memb->nodeid;
slots[i].slot = memb->slot;
i++;
}
ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num,
ls->ls_slot, ls->ls_generation);
out:
kfree(slots);
}
static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
int nodeid)
{
int i;
for (i = 0; i < rv->nodes_count; i++) {
if (rv->nodes[i].nodeid == nodeid)
return &rv->nodes[i];
}
return NULL;
}
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
{
struct dlm_member *memb, *safe;
struct dlm_config_node *node;
int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
count as a negative change so the "neg" recovery steps will happen */
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_rinfo(ls, "prev removed member %d", memb->nodeid);
neg++;
}
/* move departed members from ls_nodes to ls_nodes_gone */
list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
node = find_config_node(rv, memb->nodeid);
if (node && !node->new)
continue;
if (!node) {
log_rinfo(ls, "remove member %d", memb->nodeid);
} else {
/* removed and re-added */
log_rinfo(ls, "remove member %d comm_seq %u %u",
memb->nodeid, memb->comm_seq, node->comm_seq);
}
neg++;
list_move(&memb->list, &ls->ls_nodes_gone);
ls->ls_num_nodes--;
dlm_lsop_recover_slot(ls, memb);
}
/* add new members to ls_nodes */
for (i = 0; i < rv->nodes_count; i++) {
node = &rv->nodes[i];
if (dlm_is_member(ls, node->nodeid))
continue;
dlm_add_member(ls, node);
log_rinfo(ls, "add member %d", node->nodeid);
}
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (low == -1 || memb->nodeid < low)
low = memb->nodeid;
}
ls->ls_low_nodeid = low;
make_member_array(ls);
*neg_out = neg;
error = ping_members(ls);
if (!error || error == -EPROTO) {
/* new_lockspace() may be waiting to know if the config
is good or bad */
ls->ls_members_result = error;
complete(&ls->ls_members_done);
}
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
dlm_ls_start() is called on any of them to start the new recovery. */
int dlm_ls_stop(struct dlm_ls *ls)
{
int new;
/*
* Prevent dlm_recv from being in the middle of something when we do
* the stop. This includes ensuring dlm_recv isn't processing a
* recovery message (rcom), while dlm_recoverd is aborting and
* resetting things from an in-progress recovery. i.e. we want
* dlm_recoverd to abort its recovery without worrying about dlm_recv
* processing an rcom at the same time. Stopping dlm_recv also makes
* it easy for dlm_receive_message() to check locking stopped and add a
* message to the requestqueue without races.
*/
down_write(&ls->ls_recv_active);
/*
* Abort any recovery that's in progress (see RECOVER_STOP,
* dlm_recovery_stopped()) and tell any other threads running in the
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
*/
spin_lock(&ls->ls_recover_lock);
set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
ls->ls_recover_seq++;
spin_unlock(&ls->ls_recover_lock);
/*
* Let dlm_recv run again, now any normal messages will be saved on the
* requestqueue for later.
*/
up_write(&ls->ls_recv_active);
/*
* This in_recovery lock does two things:
* 1) Keeps this function from returning until all threads are out
* of locking routines and locking is truly stopped.
* 2) Keeps any new requests from being processed until it's unlocked
* when recovery is complete.
*/
if (new) {
set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
wake_up_process(ls->ls_recoverd_task);
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
}
/*
* The recoverd suspend/resume makes sure that dlm_recoverd (if
* running) has noticed RECOVER_STOP above and quit processing the
* previous recovery.
*/
dlm_recoverd_suspend(ls);
spin_lock(&ls->ls_recover_lock);
kfree(ls->ls_slots);
ls->ls_slots = NULL;
ls->ls_num_slots = 0;
ls->ls_slots_size = 0;
ls->ls_recover_status = 0;
spin_unlock(&ls->ls_recover_lock);
dlm_recoverd_resume(ls);
if (!ls->ls_recover_begin)
ls->ls_recover_begin = jiffies;
dlm_lsop_recover_prep(ls);
return 0;
}
int dlm_ls_start(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL, *rv_old;
struct dlm_config_node *nodes;
int error, count;
rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
if (!rv)
return -ENOMEM;
error = dlm_config_nodes(ls->ls_name, &nodes, &count);
if (error < 0)
goto fail;
spin_lock(&ls->ls_recover_lock);
/* the lockspace needs to be stopped before it can be started */
if (!dlm_locking_stopped(ls)) {
spin_unlock(&ls->ls_recover_lock);
log_error(ls, "start ignored: lockspace running");
error = -EINVAL;
goto fail;
}
rv->nodes = nodes;
rv->nodes_count = count;
rv->seq = ++ls->ls_recover_seq;
rv_old = ls->ls_recover_args;
ls->ls_recover_args = rv;
spin_unlock(&ls->ls_recover_lock);
if (rv_old) {
log_error(ls, "unused recovery %llx %d",
(unsigned long long)rv_old->seq, rv_old->nodes_count);
kfree(rv_old->nodes);
kfree(rv_old);
}
set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
wake_up_process(ls->ls_recoverd_task);
return 0;
fail:
kfree(rv);
kfree(nodes);
return error;
}

33
fs/dlm/member.h Normal file
View file

@ -0,0 +1,33 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __MEMBER_DOT_H__
#define __MEMBER_DOT_H__
int dlm_ls_stop(struct dlm_ls *ls);
int dlm_ls_start(struct dlm_ls *ls);
void dlm_clear_members(struct dlm_ls *ls);
void dlm_clear_members_gone(struct dlm_ls *ls);
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out);
int dlm_is_removed(struct dlm_ls *ls, int nodeid);
int dlm_is_member(struct dlm_ls *ls, int nodeid);
int dlm_slots_version(struct dlm_header *h);
void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
struct dlm_member *memb);
void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_slots_copy_in(struct dlm_ls *ls);
int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
struct dlm_slot **slots_out, uint32_t *gen_out);
void dlm_lsop_recover_done(struct dlm_ls *ls);
#endif /* __MEMBER_DOT_H__ */

96
fs/dlm/memory.c Normal file
View file

@ -0,0 +1,96 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "config.h"
#include "memory.h"
static struct kmem_cache *lkb_cache;
static struct kmem_cache *rsb_cache;
int __init dlm_memory_init(void)
{
lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
__alignof__(struct dlm_lkb), 0, NULL);
if (!lkb_cache)
return -ENOMEM;
rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
__alignof__(struct dlm_rsb), 0, NULL);
if (!rsb_cache) {
kmem_cache_destroy(lkb_cache);
return -ENOMEM;
}
return 0;
}
void dlm_memory_exit(void)
{
if (lkb_cache)
kmem_cache_destroy(lkb_cache);
if (rsb_cache)
kmem_cache_destroy(rsb_cache);
}
char *dlm_allocate_lvb(struct dlm_ls *ls)
{
char *p;
p = kzalloc(ls->ls_lvblen, GFP_NOFS);
return p;
}
void dlm_free_lvb(char *p)
{
kfree(p);
}
struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
{
struct dlm_rsb *r;
r = kmem_cache_zalloc(rsb_cache, GFP_NOFS);
return r;
}
void dlm_free_rsb(struct dlm_rsb *r)
{
if (r->res_lvbptr)
dlm_free_lvb(r->res_lvbptr);
kmem_cache_free(rsb_cache, r);
}
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
lkb = kmem_cache_zalloc(lkb_cache, GFP_NOFS);
return lkb;
}
void dlm_free_lkb(struct dlm_lkb *lkb)
{
if (lkb->lkb_flags & DLM_IFL_USER) {
struct dlm_user_args *ua;
ua = lkb->lkb_ua;
if (ua) {
if (ua->lksb.sb_lvbptr)
kfree(ua->lksb.sb_lvbptr);
kfree(ua);
}
}
kmem_cache_free(lkb_cache, lkb);
}

27
fs/dlm/memory.h Normal file
View file

@ -0,0 +1,27 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __MEMORY_DOT_H__
#define __MEMORY_DOT_H__
int dlm_memory_init(void);
void dlm_memory_exit(void);
struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls);
void dlm_free_rsb(struct dlm_rsb *r);
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
void dlm_free_lkb(struct dlm_lkb *l);
char *dlm_allocate_lvb(struct dlm_ls *ls);
void dlm_free_lvb(char *l);
#endif /* __MEMORY_DOT_H__ */

137
fs/dlm/midcomms.c Normal file
View file

@ -0,0 +1,137 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
/*
* midcomms.c
*
* This is the appallingly named "mid-level" comms layer.
*
* Its purpose is to take packets from the "real" comms layer,
* split them up into packets and pass them to the interested
* part of the locking mechanism.
*
* It also takes messages from the locking layer, formats them
* into packets and sends them to the comms layer.
*/
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "lock.h"
#include "midcomms.h"
static void copy_from_cb(void *dst, const void *base, unsigned offset,
unsigned len, unsigned limit)
{
unsigned copy = len;
if ((copy + offset) > limit)
copy = limit - offset;
memcpy(dst, base + offset, copy);
len -= copy;
if (len)
memcpy(dst + copy, base, len);
}
/*
* Called from the low-level comms layer to process a buffer of
* commands.
*
* Only complete messages are processed here, any "spare" bytes from
* the end of a buffer are saved and tacked onto the front of the next
* message that comes in. I doubt this will happen very often but we
* need to be able to cope with it and I don't want the task to be waiting
* for packets to come in when there is useful work to be done.
*/
int dlm_process_incoming_buffer(int nodeid, const void *base,
unsigned offset, unsigned len, unsigned limit)
{
union {
unsigned char __buf[DLM_INBUF_LEN];
/* this is to force proper alignment on some arches */
union dlm_packet p;
} __tmp;
union dlm_packet *p = &__tmp.p;
int ret = 0;
int err = 0;
uint16_t msglen;
uint32_t lockspace;
while (len > sizeof(struct dlm_header)) {
/* Copy just the header to check the total length. The
message may wrap around the end of the buffer back to the
start, so we need to use a temp buffer and copy_from_cb. */
copy_from_cb(p, base, offset, sizeof(struct dlm_header),
limit);
msglen = le16_to_cpu(p->header.h_length);
lockspace = p->header.h_lockspace;
err = -EINVAL;
if (msglen < sizeof(struct dlm_header))
break;
if (p->header.h_cmd == DLM_MSG) {
if (msglen < sizeof(struct dlm_message))
break;
} else {
if (msglen < sizeof(struct dlm_rcom))
break;
}
err = -E2BIG;
if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
}
err = 0;
/* If only part of the full message is contained in this
buffer, then do nothing and wait for lowcomms to call
us again later with more data. We return 0 meaning
we've consumed none of the input buffer. */
if (msglen > len)
break;
/* Allocate a larger temp buffer if the full message won't fit
in the buffer on the stack (which should work for most
ordinary messages). */
if (msglen > sizeof(__tmp) && p == &__tmp.p) {
p = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
if (p == NULL)
return ret;
}
copy_from_cb(p, base, offset, msglen, limit);
BUG_ON(lockspace != p->header.h_lockspace);
ret += msglen;
offset += msglen;
offset &= (limit - 1);
len -= msglen;
dlm_receive_buffer(p, nodeid);
}
if (p != &__tmp.p)
kfree(p);
return err ? err : ret;
}

21
fs/dlm/midcomms.h Normal file
View file

@ -0,0 +1,21 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __MIDCOMMS_DOT_H__
#define __MIDCOMMS_DOT_H__
int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset,
unsigned len, unsigned limit);
#endif /* __MIDCOMMS_DOT_H__ */

141
fs/dlm/netlink.c Normal file
View file

@ -0,0 +1,141 @@
/*
* Copyright (C) 2007 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*/
#include <net/genetlink.h>
#include <linux/dlm.h>
#include <linux/dlm_netlink.h>
#include <linux/gfp.h>
#include "dlm_internal.h"
static uint32_t dlm_nl_seqnum;
static uint32_t listener_nlportid;
static struct genl_family family = {
.id = GENL_ID_GENERATE,
.name = DLM_GENL_NAME,
.version = DLM_GENL_VERSION,
};
static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
{
struct sk_buff *skb;
void *data;
skb = genlmsg_new(size, GFP_NOFS);
if (!skb)
return -ENOMEM;
/* add the message headers */
data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
if (!data) {
nlmsg_free(skb);
return -EINVAL;
}
*skbp = skb;
return 0;
}
static struct dlm_lock_data *mk_data(struct sk_buff *skb)
{
struct nlattr *ret;
ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
if (!ret)
return NULL;
return nla_data(ret);
}
static int send_data(struct sk_buff *skb)
{
struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
void *data = genlmsg_data(genlhdr);
int rv;
rv = genlmsg_end(skb, data);
if (rv < 0) {
nlmsg_free(skb);
return rv;
}
return genlmsg_unicast(&init_net, skb, listener_nlportid);
}
static int user_cmd(struct sk_buff *skb, struct genl_info *info)
{
listener_nlportid = info->snd_portid;
printk("user_cmd nlpid %u\n", listener_nlportid);
return 0;
}
static struct genl_ops dlm_nl_ops[] = {
{
.cmd = DLM_CMD_HELLO,
.doit = user_cmd,
},
};
int __init dlm_netlink_init(void)
{
return genl_register_family_with_ops(&family, dlm_nl_ops);
}
void dlm_netlink_exit(void)
{
genl_unregister_family(&family);
}
static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
{
struct dlm_rsb *r = lkb->lkb_resource;
memset(data, 0, sizeof(struct dlm_lock_data));
data->version = DLM_LOCK_DATA_VERSION;
data->nodeid = lkb->lkb_nodeid;
data->ownpid = lkb->lkb_ownpid;
data->id = lkb->lkb_id;
data->remid = lkb->lkb_remid;
data->status = lkb->lkb_status;
data->grmode = lkb->lkb_grmode;
data->rqmode = lkb->lkb_rqmode;
if (lkb->lkb_ua)
data->xid = lkb->lkb_ua->xid;
if (r) {
data->lockspace_id = r->res_ls->ls_global_id;
data->resource_namelen = r->res_length;
memcpy(data->resource_name, r->res_name, r->res_length);
}
}
void dlm_timeout_warn(struct dlm_lkb *lkb)
{
struct sk_buff *uninitialized_var(send_skb);
struct dlm_lock_data *data;
size_t size;
int rv;
size = nla_total_size(sizeof(struct dlm_lock_data)) +
nla_total_size(0); /* why this? */
rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
if (rv < 0)
return;
data = mk_data(send_skb);
if (!data) {
nlmsg_free(send_skb);
return;
}
fill_data(data, lkb);
send_data(send_skb);
}

515
fs/dlm/plock.c Normal file
View file

@ -0,0 +1,515 @@
/*
* Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/poll.h>
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
#include <linux/slab.h>
#include "dlm_internal.h"
#include "lockspace.h"
static spinlock_t ops_lock;
static struct list_head send_list;
static struct list_head recv_list;
static wait_queue_head_t send_wq;
static wait_queue_head_t recv_wq;
struct plock_op {
struct list_head list;
int done;
struct dlm_plock_info info;
};
struct plock_xop {
struct plock_op xop;
int (*callback)(struct file_lock *fl, int result);
void *fl;
void *file;
struct file_lock flc;
};
static inline void set_version(struct dlm_plock_info *info)
{
info->version[0] = DLM_PLOCK_VERSION_MAJOR;
info->version[1] = DLM_PLOCK_VERSION_MINOR;
info->version[2] = DLM_PLOCK_VERSION_PATCH;
}
static int check_version(struct dlm_plock_info *info)
{
if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
(DLM_PLOCK_VERSION_MINOR < info->version[1])) {
log_print("plock device version mismatch: "
"kernel (%u.%u.%u), user (%u.%u.%u)",
DLM_PLOCK_VERSION_MAJOR,
DLM_PLOCK_VERSION_MINOR,
DLM_PLOCK_VERSION_PATCH,
info->version[0],
info->version[1],
info->version[2]);
return -EINVAL;
}
return 0;
}
static void send_op(struct plock_op *op)
{
set_version(&op->info);
INIT_LIST_HEAD(&op->list);
spin_lock(&ops_lock);
list_add_tail(&op->list, &send_list);
spin_unlock(&ops_lock);
wake_up(&send_wq);
}
/* If a process was killed while waiting for the only plock on a file,
locks_remove_posix will not see any lock on the file so it won't
send an unlock-close to us to pass on to userspace to clean up the
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
static void do_unlock_close(struct dlm_ls *ls, u64 number,
struct file *file, struct file_lock *fl)
{
struct plock_op *op;
op = kzalloc(sizeof(*op), GFP_NOFS);
if (!op)
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
op->info.pid = fl->fl_pid;
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
}
int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
int cmd, struct file_lock *fl)
{
struct dlm_ls *ls;
struct plock_op *op;
struct plock_xop *xop;
int rv;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
xop = kzalloc(sizeof(*xop), GFP_NOFS);
if (!xop) {
rv = -ENOMEM;
goto out;
}
op = &xop->xop;
op->info.optype = DLM_PLOCK_OP_LOCK;
op->info.pid = fl->fl_pid;
op->info.ex = (fl->fl_type == F_WRLCK);
op->info.wait = IS_SETLKW(cmd);
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
/* fl_owner is lockd which doesn't distinguish
processes on the nfs client */
op->info.owner = (__u64) fl->fl_pid;
xop->callback = fl->fl_lmops->lm_grant;
locks_init_lock(&xop->flc);
locks_copy_lock(&xop->flc, fl);
xop->fl = fl;
xop->file = file;
} else {
op->info.owner = (__u64)(long) fl->fl_owner;
xop->callback = NULL;
}
send_op(op);
if (xop->callback == NULL) {
rv = wait_event_killable(recv_wq, (op->done != 0));
if (rv == -ERESTARTSYS) {
log_debug(ls, "dlm_posix_lock: wait killed %llx",
(unsigned long long)number);
spin_lock(&ops_lock);
list_del(&op->list);
spin_unlock(&ops_lock);
kfree(xop);
do_unlock_close(ls, number, file, fl);
goto out;
}
} else {
rv = FILE_LOCK_DEFERRED;
goto out;
}
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
log_error(ls, "dlm_posix_lock: op on list %llx",
(unsigned long long)number);
list_del(&op->list);
}
spin_unlock(&ops_lock);
rv = op->info.rv;
if (!rv) {
if (posix_lock_file_wait(file, fl) < 0)
log_error(ls, "dlm_posix_lock: vfs lock error %llx",
(unsigned long long)number);
}
kfree(xop);
out:
dlm_put_lockspace(ls);
return rv;
}
EXPORT_SYMBOL_GPL(dlm_posix_lock);
/* Returns failure iff a successful lock operation should be canceled */
static int dlm_plock_callback(struct plock_op *op)
{
struct file *file;
struct file_lock *fl;
struct file_lock *flc;
int (*notify)(struct file_lock *fl, int result) = NULL;
struct plock_xop *xop = (struct plock_xop *)op;
int rv = 0;
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
log_print("dlm_plock_callback: op on list %llx",
(unsigned long long)op->info.number);
list_del(&op->list);
}
spin_unlock(&ops_lock);
/* check if the following 2 are still valid or make a copy */
file = xop->file;
flc = &xop->flc;
fl = xop->fl;
notify = xop->callback;
if (op->info.rv) {
notify(fl, op->info.rv);
goto out;
}
/* got fs lock; bookkeep locally as well: */
flc->fl_flags &= ~FL_SLEEP;
if (posix_lock_file(file, flc, NULL)) {
/*
* This can only happen in the case of kmalloc() failure.
* The filesystem's own lock is the authoritative lock,
* so a failure to get the lock locally is not a disaster.
* As long as the fs cannot reliably cancel locks (especially
* in a low-memory situation), we're better off ignoring
* this failure than trying to recover.
*/
log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
(unsigned long long)op->info.number, file, fl);
}
rv = notify(fl, 0);
if (rv) {
/* XXX: We need to cancel the fs lock here: */
log_print("dlm_plock_callback: lock granted after lock request "
"failed; dangling lock!\n");
goto out;
}
out:
kfree(xop);
return rv;
}
int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
struct file_lock *fl)
{
struct dlm_ls *ls;
struct plock_op *op;
int rv;
unsigned char fl_flags = fl->fl_flags;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
op = kzalloc(sizeof(*op), GFP_NOFS);
if (!op) {
rv = -ENOMEM;
goto out;
}
/* cause the vfs unlock to return ENOENT if lock is not found */
fl->fl_flags |= FL_EXISTS;
rv = posix_lock_file_wait(file, fl);
if (rv == -ENOENT) {
rv = 0;
goto out_free;
}
if (rv < 0) {
log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
rv, (unsigned long long)number);
}
op->info.optype = DLM_PLOCK_OP_UNLOCK;
op->info.pid = fl->fl_pid;
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
if (fl->fl_flags & FL_CLOSE) {
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
rv = 0;
goto out;
}
send_op(op);
wait_event(recv_wq, (op->done != 0));
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
log_error(ls, "dlm_posix_unlock: op on list %llx",
(unsigned long long)number);
list_del(&op->list);
}
spin_unlock(&ops_lock);
rv = op->info.rv;
if (rv == -ENOENT)
rv = 0;
out_free:
kfree(op);
out:
dlm_put_lockspace(ls);
fl->fl_flags = fl_flags;
return rv;
}
EXPORT_SYMBOL_GPL(dlm_posix_unlock);
int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
struct file_lock *fl)
{
struct dlm_ls *ls;
struct plock_op *op;
int rv;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
op = kzalloc(sizeof(*op), GFP_NOFS);
if (!op) {
rv = -ENOMEM;
goto out;
}
op->info.optype = DLM_PLOCK_OP_GET;
op->info.pid = fl->fl_pid;
op->info.ex = (fl->fl_type == F_WRLCK);
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
send_op(op);
wait_event(recv_wq, (op->done != 0));
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
log_error(ls, "dlm_posix_get: op on list %llx",
(unsigned long long)number);
list_del(&op->list);
}
spin_unlock(&ops_lock);
/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
-ENOENT if there are no locks on the file */
rv = op->info.rv;
fl->fl_type = F_UNLCK;
if (rv == -ENOENT)
rv = 0;
else if (rv > 0) {
locks_init_lock(fl);
fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
fl->fl_flags = FL_POSIX;
fl->fl_pid = op->info.pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
rv = 0;
}
kfree(op);
out:
dlm_put_lockspace(ls);
return rv;
}
EXPORT_SYMBOL_GPL(dlm_posix_get);
/* a read copies out one plock request from the send list */
static ssize_t dev_read(struct file *file, char __user *u, size_t count,
loff_t *ppos)
{
struct dlm_plock_info info;
struct plock_op *op = NULL;
if (count < sizeof(info))
return -EINVAL;
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_entry(send_list.next, struct plock_op, list);
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
list_move(&op->list, &recv_list);
memcpy(&info, &op->info, sizeof(info));
}
spin_unlock(&ops_lock);
if (!op)
return -EAGAIN;
/* there is no need to get a reply from userspace for unlocks
that were generated by the vfs cleaning up for a close
(the process did not make an unlock call). */
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
kfree(op);
if (copy_to_user(u, &info, sizeof(info)))
return -EFAULT;
return sizeof(info);
}
/* a write copies in one plock result that should match a plock_op
on the recv list */
static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
loff_t *ppos)
{
struct dlm_plock_info info;
struct plock_op *op;
int found = 0, do_callback = 0;
if (count != sizeof(info))
return -EINVAL;
if (copy_from_user(&info, u, sizeof(info)))
return -EFAULT;
if (check_version(&info))
return -EINVAL;
spin_lock(&ops_lock);
list_for_each_entry(op, &recv_list, list) {
if (op->info.fsid == info.fsid &&
op->info.number == info.number &&
op->info.owner == info.owner) {
struct plock_xop *xop = (struct plock_xop *)op;
list_del_init(&op->list);
memcpy(&op->info, &info, sizeof(info));
if (xop->callback)
do_callback = 1;
else
op->done = 1;
found = 1;
break;
}
}
spin_unlock(&ops_lock);
if (found) {
if (do_callback)
dlm_plock_callback(op);
else
wake_up(&recv_wq);
} else
log_print("dev_write no op %x %llx", info.fsid,
(unsigned long long)info.number);
return count;
}
static unsigned int dev_poll(struct file *file, poll_table *wait)
{
unsigned int mask = 0;
poll_wait(file, &send_wq, wait);
spin_lock(&ops_lock);
if (!list_empty(&send_list))
mask = POLLIN | POLLRDNORM;
spin_unlock(&ops_lock);
return mask;
}
static const struct file_operations dev_fops = {
.read = dev_read,
.write = dev_write,
.poll = dev_poll,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
static struct miscdevice plock_dev_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = DLM_PLOCK_MISC_NAME,
.fops = &dev_fops
};
int dlm_plock_init(void)
{
int rv;
spin_lock_init(&ops_lock);
INIT_LIST_HEAD(&send_list);
INIT_LIST_HEAD(&recv_list);
init_waitqueue_head(&send_wq);
init_waitqueue_head(&recv_wq);
rv = misc_register(&plock_dev_misc);
if (rv)
log_print("dlm_plock_init: misc_register failed %d", rv);
return rv;
}
void dlm_plock_exit(void)
{
if (misc_deregister(&plock_dev_misc) < 0)
log_print("dlm_plock_exit: misc_deregister failed");
}

656
fs/dlm/rcom.c Normal file
View file

@ -0,0 +1,656 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "lowcomms.h"
#include "midcomms.h"
#include "rcom.h"
#include "recover.h"
#include "dir.h"
#include "config.h"
#include "memory.h"
#include "lock.h"
#include "util.h"
static int rcom_response(struct dlm_ls *ls)
{
return test_bit(LSFL_RCOM_READY, &ls->ls_flags);
}
static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
char *mb;
int mb_len = sizeof(struct dlm_rcom) + len;
mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
if (!mh) {
log_print("create_rcom to %d type %d len %d ENOBUFS",
to_nodeid, type, len);
return -ENOBUFS;
}
memset(mb, 0, mb_len);
rc = (struct dlm_rcom *) mb;
rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
rc->rc_header.h_lockspace = ls->ls_global_id;
rc->rc_header.h_nodeid = dlm_our_nodeid();
rc->rc_header.h_length = mb_len;
rc->rc_header.h_cmd = DLM_RCOM;
rc->rc_type = type;
spin_lock(&ls->ls_recover_lock);
rc->rc_seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
*mh_ret = mh;
*rc_ret = rc;
return 0;
}
static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
struct dlm_rcom *rc)
{
dlm_rcom_out(rc);
dlm_lowcomms_commit_buffer(mh);
}
static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs,
uint32_t flags)
{
rs->rs_flags = cpu_to_le32(flags);
}
/* When replying to a status request, a node also sends back its
configuration values. The requesting node then checks that the remote
node is configured the same way as itself. */
static void set_rcom_config(struct dlm_ls *ls, struct rcom_config *rf,
uint32_t num_slots)
{
rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen);
rf->rf_lsflags = cpu_to_le32(ls->ls_exflags);
rf->rf_our_slot = cpu_to_le16(ls->ls_slot);
rf->rf_num_slots = cpu_to_le16(num_slots);
rf->rf_generation = cpu_to_le32(ls->ls_generation);
}
static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
log_error(ls, "version mismatch: %x nodeid %d: %x",
DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
rc->rc_header.h_version);
return -EPROTO;
}
if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen ||
le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) {
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
ls->ls_lvblen, ls->ls_exflags, nodeid,
le32_to_cpu(rf->rf_lvblen),
le32_to_cpu(rf->rf_lsflags));
return -EPROTO;
}
return 0;
}
static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
{
spin_lock(&ls->ls_rcom_spin);
*new_seq = ++ls->ls_rcom_seq;
set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
spin_unlock(&ls->ls_rcom_spin);
}
static void disallow_sync_reply(struct dlm_ls *ls)
{
spin_lock(&ls->ls_rcom_spin);
clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
spin_unlock(&ls->ls_rcom_spin);
}
/*
* low nodeid gathers one slot value at a time from each node.
* it sets need_slots=0, and saves rf_our_slot returned from each
* rcom_config.
*
* other nodes gather all slot values at once from the low nodeid.
* they set need_slots=1, and ignore the rf_our_slot returned from each
* rcom_config. they use the rf_num_slots returned from the low
* node's rcom_config.
*/
int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error = 0;
ls->ls_recover_nodeid = nodeid;
if (nodeid == dlm_our_nodeid()) {
rc = ls->ls_recover_buf;
rc->rc_result = dlm_recover_status(ls);
goto out;
}
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
sizeof(struct rcom_status), &rc, &mh);
if (error)
goto out;
set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags);
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
if (error)
goto out;
rc = ls->ls_recover_buf;
if (rc->rc_result == -ESRCH) {
/* we pretend the remote lockspace exists with 0 status */
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
error = 0;
} else {
error = check_rcom_config(ls, rc, nodeid);
}
/* the caller looks at rc_result for the remote recovery status */
out:
return error;
}
static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct rcom_status *rs;
uint32_t status;
int nodeid = rc_in->rc_header.h_nodeid;
int len = sizeof(struct rcom_config);
int num_slots = 0;
int error;
if (!dlm_slots_version(&rc_in->rc_header)) {
status = dlm_recover_status(ls);
goto do_create;
}
rs = (struct rcom_status *)rc_in->rc_buf;
if (!(le32_to_cpu(rs->rs_flags) & DLM_RSF_NEED_SLOTS)) {
status = dlm_recover_status(ls);
goto do_create;
}
spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status;
num_slots = ls->ls_num_slots;
spin_unlock(&ls->ls_recover_lock);
len += num_slots * sizeof(struct rcom_slot);
do_create:
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY,
len, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = status;
set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots);
if (!num_slots)
goto do_send;
spin_lock(&ls->ls_recover_lock);
if (ls->ls_num_slots != num_slots) {
spin_unlock(&ls->ls_recover_lock);
log_debug(ls, "receive_rcom_status num_slots %d to %d",
num_slots, ls->ls_num_slots);
rc->rc_result = 0;
set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, 0);
goto do_send;
}
dlm_slots_copy_out(ls, rc);
spin_unlock(&ls->ls_recover_lock);
do_send:
send_rcom(ls, mh, rc);
}
static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
spin_lock(&ls->ls_rcom_spin);
if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
rc_in->rc_id != ls->ls_rcom_seq) {
log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
rc_in->rc_type, rc_in->rc_header.h_nodeid,
(unsigned long long)rc_in->rc_id,
(unsigned long long)ls->ls_rcom_seq);
goto out;
}
memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
set_bit(LSFL_RCOM_READY, &ls->ls_flags);
clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
wake_up(&ls->ls_wait_general);
out:
spin_unlock(&ls->ls_rcom_spin);
}
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error = 0;
ls->ls_recover_nodeid = nodeid;
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
if (error)
goto out;
memcpy(rc->rc_buf, last_name, last_len);
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
out:
return error;
}
static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, inlen, outlen, nodeid;
nodeid = rc_in->rc_header.h_nodeid;
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
send_rcom(ls, mh, rc);
}
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct dlm_ls *ls = r->res_ls;
int error;
error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length,
&rc, &mh);
if (error)
goto out;
memcpy(rc->rc_buf, r->res_name, r->res_length);
rc->rc_id = (unsigned long) r->res_id;
send_rcom(ls, mh, rc);
out:
return error;
}
int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct dlm_ls *ls = r->res_ls;
int error;
error = create_rcom(ls, to_nodeid, DLM_RCOM_LOOKUP, r->res_length,
&rc, &mh);
if (error)
goto out;
memcpy(rc->rc_buf, r->res_name, r->res_length);
rc->rc_id = 0xFFFFFFFF;
send_rcom(ls, mh, rc);
out:
return error;
}
static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid;
int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
if (error)
return;
if (rc_in->rc_id == 0xFFFFFFFF) {
log_error(ls, "receive_rcom_lookup dump from %d", nodeid);
dlm_dump_rsb_name(ls, rc_in->rc_buf, len);
return;
}
error = dlm_master_lookup(ls, nodeid, rc_in->rc_buf, len,
DLM_LU_RECOVER_MASTER, &ret_nodeid, NULL);
if (error)
ret_nodeid = error;
rc->rc_result = ret_nodeid;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
dlm_recover_master_reply(ls, rc_in);
}
static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb,
struct rcom_lock *rl)
{
memset(rl, 0, sizeof(*rl));
rl->rl_ownpid = cpu_to_le32(lkb->lkb_ownpid);
rl->rl_lkid = cpu_to_le32(lkb->lkb_id);
rl->rl_exflags = cpu_to_le32(lkb->lkb_exflags);
rl->rl_flags = cpu_to_le32(lkb->lkb_flags);
rl->rl_lvbseq = cpu_to_le32(lkb->lkb_lvbseq);
rl->rl_rqmode = lkb->lkb_rqmode;
rl->rl_grmode = lkb->lkb_grmode;
rl->rl_status = lkb->lkb_status;
rl->rl_wait_type = cpu_to_le16(lkb->lkb_wait_type);
if (lkb->lkb_bastfn)
rl->rl_asts |= DLM_CB_BAST;
if (lkb->lkb_astfn)
rl->rl_asts |= DLM_CB_CAST;
rl->rl_namelen = cpu_to_le16(r->res_length);
memcpy(rl->rl_name, r->res_name, r->res_length);
/* FIXME: might we have an lvb without DLM_LKF_VALBLK set ?
If so, receive_rcom_lock_args() won't take this copy. */
if (lkb->lkb_lvbptr)
memcpy(rl->rl_lvb, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
}
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
struct dlm_ls *ls = r->res_ls;
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct rcom_lock *rl;
int error, len = sizeof(struct rcom_lock);
if (lkb->lkb_lvbptr)
len += ls->ls_lvblen;
error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh);
if (error)
goto out;
rl = (struct rcom_lock *) rc->rc_buf;
pack_rcom_lock(r, lkb, rl);
rc->rc_id = (unsigned long) r;
send_rcom(ls, mh, rc);
out:
return error;
}
/* needs at least dlm_rcom + rcom_lock */
static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, nodeid = rc_in->rc_header.h_nodeid;
dlm_recover_master_copy(ls, rc_in);
error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY,
sizeof(struct rcom_lock), &rc, &mh);
if (error)
return;
/* We send back the same rcom_lock struct we received, but
dlm_recover_master_copy() has filled in rl_remid and rl_result */
memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
/* If the lockspace doesn't exist then still send a status message
back; it's possible that it just doesn't have its global_id yet. */
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct rcom_config *rf;
struct dlm_mhandle *mh;
char *mb;
int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
if (!mh)
return -ENOBUFS;
memset(mb, 0, mb_len);
rc = (struct dlm_rcom *) mb;
rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
rc->rc_header.h_lockspace = rc_in->rc_header.h_lockspace;
rc->rc_header.h_nodeid = dlm_our_nodeid();
rc->rc_header.h_length = mb_len;
rc->rc_header.h_cmd = DLM_RCOM;
rc->rc_type = DLM_RCOM_STATUS_REPLY;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = -ESRCH;
rf = (struct rcom_config *) rc->rc_buf;
rf->rf_lvblen = cpu_to_le32(~0U);
dlm_rcom_out(rc);
dlm_lowcomms_commit_buffer(mh);
return 0;
}
/*
* Ignore messages for stage Y before we set
* recover_status bit for stage X:
*
* recover_status = 0
*
* dlm_recover_members()
* - send nothing
* - recv nothing
* - ignore NAMES, NAMES_REPLY
* - ignore LOOKUP, LOOKUP_REPLY
* - ignore LOCK, LOCK_REPLY
*
* recover_status |= NODES
*
* dlm_recover_members_wait()
*
* dlm_recover_directory()
* - send NAMES
* - recv NAMES_REPLY
* - ignore LOOKUP, LOOKUP_REPLY
* - ignore LOCK, LOCK_REPLY
*
* recover_status |= DIR
*
* dlm_recover_directory_wait()
*
* dlm_recover_masters()
* - send LOOKUP
* - recv LOOKUP_REPLY
*
* dlm_recover_locks()
* - send LOCKS
* - recv LOCKS_REPLY
*
* recover_status |= LOCKS
*
* dlm_recover_locks_wait()
*
* recover_status |= DONE
*/
/* Called by dlm_recv; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock);
int stop, reply = 0, names = 0, lookup = 0, lock = 0;
uint32_t status;
uint64_t seq;
switch (rc->rc_type) {
case DLM_RCOM_STATUS_REPLY:
reply = 1;
break;
case DLM_RCOM_NAMES:
names = 1;
break;
case DLM_RCOM_NAMES_REPLY:
names = 1;
reply = 1;
break;
case DLM_RCOM_LOOKUP:
lookup = 1;
break;
case DLM_RCOM_LOOKUP_REPLY:
lookup = 1;
reply = 1;
break;
case DLM_RCOM_LOCK:
lock = 1;
break;
case DLM_RCOM_LOCK_REPLY:
lock = 1;
reply = 1;
break;
};
spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status;
stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
if (stop && (rc->rc_type != DLM_RCOM_STATUS))
goto ignore;
if (reply && (rc->rc_seq_reply != seq))
goto ignore;
if (!(status & DLM_RS_NODES) && (names || lookup || lock))
goto ignore;
if (!(status & DLM_RS_DIR) && (lookup || lock))
goto ignore;
switch (rc->rc_type) {
case DLM_RCOM_STATUS:
receive_rcom_status(ls, rc);
break;
case DLM_RCOM_NAMES:
receive_rcom_names(ls, rc);
break;
case DLM_RCOM_LOOKUP:
receive_rcom_lookup(ls, rc);
break;
case DLM_RCOM_LOCK:
if (rc->rc_header.h_length < lock_size)
goto Eshort;
receive_rcom_lock(ls, rc);
break;
case DLM_RCOM_STATUS_REPLY:
receive_sync_reply(ls, rc);
break;
case DLM_RCOM_NAMES_REPLY:
receive_sync_reply(ls, rc);
break;
case DLM_RCOM_LOOKUP_REPLY:
receive_rcom_lookup_reply(ls, rc);
break;
case DLM_RCOM_LOCK_REPLY:
if (rc->rc_header.h_length < lock_size)
goto Eshort;
dlm_recover_process_copy(ls, rc);
break;
default:
log_error(ls, "receive_rcom bad type %d", rc->rc_type);
}
return;
ignore:
log_limit(ls, "dlm_receive_rcom ignore msg %d "
"from %d %llu %llu recover seq %llu sts %x gen %u",
rc->rc_type,
nodeid,
(unsigned long long)rc->rc_seq,
(unsigned long long)rc->rc_seq_reply,
(unsigned long long)seq,
status, ls->ls_generation);
return;
Eshort:
log_error(ls, "recovery message %d from %d is too short",
rc->rc_type, nodeid);
}

26
fs/dlm/rcom.h Normal file
View file

@ -0,0 +1,26 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __RCOM_DOT_H__
#define __RCOM_DOT_H__
int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags);
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid);
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
#endif

955
fs/dlm/recover.c Normal file
View file

@ -0,0 +1,955 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "dir.h"
#include "config.h"
#include "ast.h"
#include "memory.h"
#include "rcom.h"
#include "lock.h"
#include "lowcomms.h"
#include "member.h"
#include "recover.h"
/*
* Recovery waiting routines: these functions wait for a particular reply from
* a remote node, or for the remote node to report a certain status. They need
* to abort if the lockspace is stopped indicating a node has failed (perhaps
* the one being waited for).
*/
/*
* Wait until given function returns non-zero or lockspace is stopped
* (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another
* function thinks it could have completed the waited-on task, they should wake
* up ls_wait_general to get an immediate response rather than waiting for the
* timeout. This uses a timeout so it can check periodically if the wait
* should abort due to node failure (which doesn't cause a wake_up).
* This should only be called by the dlm_recoverd thread.
*/
int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
{
int error = 0;
int rv;
while (1) {
rv = wait_event_timeout(ls->ls_wait_general,
testfn(ls) || dlm_recovery_stopped(ls),
dlm_config.ci_recover_timer * HZ);
if (rv)
break;
}
if (dlm_recovery_stopped(ls)) {
log_debug(ls, "dlm_wait_function aborted");
error = -EINTR;
}
return error;
}
/*
* An efficient way for all nodes to wait for all others to have a certain
* status. The node with the lowest nodeid polls all the others for their
* status (wait_status_all) and all the others poll the node with the low id
* for its accumulated result (wait_status_low). When all nodes have set
* status flag X, then status flag X_ALL will be set on the low nodeid.
*/
uint32_t dlm_recover_status(struct dlm_ls *ls)
{
uint32_t status;
spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status;
spin_unlock(&ls->ls_recover_lock);
return status;
}
static void _set_recover_status(struct dlm_ls *ls, uint32_t status)
{
ls->ls_recover_status |= status;
}
void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
{
spin_lock(&ls->ls_recover_lock);
_set_recover_status(ls, status);
spin_unlock(&ls->ls_recover_lock);
}
static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
int save_slots)
{
struct dlm_rcom *rc = ls->ls_recover_buf;
struct dlm_member *memb;
int error = 0, delay;
list_for_each_entry(memb, &ls->ls_nodes, list) {
delay = 0;
for (;;) {
if (dlm_recovery_stopped(ls)) {
error = -EINTR;
goto out;
}
error = dlm_rcom_status(ls, memb->nodeid, 0);
if (error)
goto out;
if (save_slots)
dlm_slot_save(ls, rc, memb);
if (rc->rc_result & wait_status)
break;
if (delay < 1000)
delay += 20;
msleep(delay);
}
}
out:
return error;
}
static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
uint32_t status_flags)
{
struct dlm_rcom *rc = ls->ls_recover_buf;
int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
for (;;) {
if (dlm_recovery_stopped(ls)) {
error = -EINTR;
goto out;
}
error = dlm_rcom_status(ls, nodeid, status_flags);
if (error)
break;
if (rc->rc_result & wait_status)
break;
if (delay < 1000)
delay += 20;
msleep(delay);
}
out:
return error;
}
static int wait_status(struct dlm_ls *ls, uint32_t status)
{
uint32_t status_all = status << 1;
int error;
if (ls->ls_low_nodeid == dlm_our_nodeid()) {
error = wait_status_all(ls, status, 0);
if (!error)
dlm_set_recover_status(ls, status_all);
} else
error = wait_status_low(ls, status_all, 0);
return error;
}
int dlm_recover_members_wait(struct dlm_ls *ls)
{
struct dlm_member *memb;
struct dlm_slot *slots;
int num_slots, slots_size;
int error, rv;
uint32_t gen;
list_for_each_entry(memb, &ls->ls_nodes, list) {
memb->slot = -1;
memb->generation = 0;
}
if (ls->ls_low_nodeid == dlm_our_nodeid()) {
error = wait_status_all(ls, DLM_RS_NODES, 1);
if (error)
goto out;
/* slots array is sparse, slots_size may be > num_slots */
rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen);
if (!rv) {
spin_lock(&ls->ls_recover_lock);
_set_recover_status(ls, DLM_RS_NODES_ALL);
ls->ls_num_slots = num_slots;
ls->ls_slots_size = slots_size;
ls->ls_slots = slots;
ls->ls_generation = gen;
spin_unlock(&ls->ls_recover_lock);
} else {
dlm_set_recover_status(ls, DLM_RS_NODES_ALL);
}
} else {
error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS);
if (error)
goto out;
dlm_slots_copy_in(ls);
}
out:
return error;
}
int dlm_recover_directory_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_DIR);
}
int dlm_recover_locks_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_LOCKS);
}
int dlm_recover_done_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_DONE);
}
/*
* The recover_list contains all the rsb's for which we've requested the new
* master nodeid. As replies are returned from the resource directories the
* rsb's are removed from the list. When the list is empty we're done.
*
* The recover_list is later similarly used for all rsb's for which we've sent
* new lkb's and need to receive new corresponding lkid's.
*
* We use the address of the rsb struct as a simple local identifier for the
* rsb so we can match an rcom reply with the rsb it was sent for.
*/
static int recover_list_empty(struct dlm_ls *ls)
{
int empty;
spin_lock(&ls->ls_recover_list_lock);
empty = list_empty(&ls->ls_recover_list);
spin_unlock(&ls->ls_recover_list_lock);
return empty;
}
static void recover_list_add(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
spin_lock(&ls->ls_recover_list_lock);
if (list_empty(&r->res_recover_list)) {
list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
ls->ls_recover_list_count++;
dlm_hold_rsb(r);
}
spin_unlock(&ls->ls_recover_list_lock);
}
static void recover_list_del(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
spin_lock(&ls->ls_recover_list_lock);
list_del_init(&r->res_recover_list);
ls->ls_recover_list_count--;
spin_unlock(&ls->ls_recover_list_lock);
dlm_put_rsb(r);
}
static void recover_list_clear(struct dlm_ls *ls)
{
struct dlm_rsb *r, *s;
spin_lock(&ls->ls_recover_list_lock);
list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
list_del_init(&r->res_recover_list);
r->res_recover_locks_count = 0;
dlm_put_rsb(r);
ls->ls_recover_list_count--;
}
if (ls->ls_recover_list_count != 0) {
log_error(ls, "warning: recover_list_count %d",
ls->ls_recover_list_count);
ls->ls_recover_list_count = 0;
}
spin_unlock(&ls->ls_recover_list_lock);
}
static int recover_idr_empty(struct dlm_ls *ls)
{
int empty = 1;
spin_lock(&ls->ls_recover_idr_lock);
if (ls->ls_recover_list_count)
empty = 0;
spin_unlock(&ls->ls_recover_idr_lock);
return empty;
}
static int recover_idr_add(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
int rv;
idr_preload(GFP_NOFS);
spin_lock(&ls->ls_recover_idr_lock);
if (r->res_id) {
rv = -1;
goto out_unlock;
}
rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT);
if (rv < 0)
goto out_unlock;
r->res_id = rv;
ls->ls_recover_list_count++;
dlm_hold_rsb(r);
rv = 0;
out_unlock:
spin_unlock(&ls->ls_recover_idr_lock);
idr_preload_end();
return rv;
}
static void recover_idr_del(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
spin_lock(&ls->ls_recover_idr_lock);
idr_remove(&ls->ls_recover_idr, r->res_id);
r->res_id = 0;
ls->ls_recover_list_count--;
spin_unlock(&ls->ls_recover_idr_lock);
dlm_put_rsb(r);
}
static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id)
{
struct dlm_rsb *r;
spin_lock(&ls->ls_recover_idr_lock);
r = idr_find(&ls->ls_recover_idr, (int)id);
spin_unlock(&ls->ls_recover_idr_lock);
return r;
}
static void recover_idr_clear(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int id;
spin_lock(&ls->ls_recover_idr_lock);
idr_for_each_entry(&ls->ls_recover_idr, r, id) {
idr_remove(&ls->ls_recover_idr, id);
r->res_id = 0;
r->res_recover_locks_count = 0;
ls->ls_recover_list_count--;
dlm_put_rsb(r);
}
if (ls->ls_recover_list_count != 0) {
log_error(ls, "warning: recover_list_count %d",
ls->ls_recover_list_count);
ls->ls_recover_list_count = 0;
}
spin_unlock(&ls->ls_recover_idr_lock);
}
/* Master recovery: find new master node for rsb's that were
mastered on nodes that have been removed.
dlm_recover_masters
recover_master
dlm_send_rcom_lookup -> receive_rcom_lookup
dlm_dir_lookup
receive_rcom_lookup_reply <-
dlm_recover_master_reply
set_new_master
set_master_lkbs
set_lock_master
*/
/*
* Set the lock master for all LKBs in a lock queue
* If we are the new master of the rsb, we may have received new
* MSTCPY locks from other nodes already which we need to ignore
* when setting the new nodeid.
*/
static void set_lock_master(struct list_head *queue, int nodeid)
{
struct dlm_lkb *lkb;
list_for_each_entry(lkb, queue, lkb_statequeue) {
if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) {
lkb->lkb_nodeid = nodeid;
lkb->lkb_remid = 0;
}
}
}
static void set_master_lkbs(struct dlm_rsb *r)
{
set_lock_master(&r->res_grantqueue, r->res_nodeid);
set_lock_master(&r->res_convertqueue, r->res_nodeid);
set_lock_master(&r->res_waitqueue, r->res_nodeid);
}
/*
* Propagate the new master nodeid to locks
* The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
* The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which
* rsb's to consider.
*/
static void set_new_master(struct dlm_rsb *r)
{
set_master_lkbs(r);
rsb_set_flag(r, RSB_NEW_MASTER);
rsb_set_flag(r, RSB_NEW_MASTER2);
}
/*
* We do async lookups on rsb's that need new masters. The rsb's
* waiting for a lookup reply are kept on the recover_list.
*
* Another node recovering the master may have sent us a rcom lookup,
* and our dlm_master_lookup() set it as the new master, along with
* NEW_MASTER so that we'll recover it here (this implies dir_nodeid
* equals our_nodeid below).
*/
static int recover_master(struct dlm_rsb *r, unsigned int *count)
{
struct dlm_ls *ls = r->res_ls;
int our_nodeid, dir_nodeid;
int is_removed = 0;
int error;
if (is_master(r))
return 0;
is_removed = dlm_is_removed(ls, r->res_nodeid);
if (!is_removed && !rsb_flag(r, RSB_NEW_MASTER))
return 0;
our_nodeid = dlm_our_nodeid();
dir_nodeid = dlm_dir_nodeid(r);
if (dir_nodeid == our_nodeid) {
if (is_removed) {
r->res_master_nodeid = our_nodeid;
r->res_nodeid = 0;
}
/* set master of lkbs to ourself when is_removed, or to
another new master which we set along with NEW_MASTER
in dlm_master_lookup */
set_new_master(r);
error = 0;
} else {
recover_idr_add(r);
error = dlm_send_rcom_lookup(r, dir_nodeid);
}
(*count)++;
return error;
}
/*
* All MSTCPY locks are purged and rebuilt, even if the master stayed the same.
* This is necessary because recovery can be started, aborted and restarted,
* causing the master nodeid to briefly change during the aborted recovery, and
* change back to the original value in the second recovery. The MSTCPY locks
* may or may not have been purged during the aborted recovery. Another node
* with an outstanding request in waiters list and a request reply saved in the
* requestqueue, cannot know whether it should ignore the reply and resend the
* request, or accept the reply and complete the request. It must do the
* former if the remote node purged MSTCPY locks, and it must do the later if
* the remote node did not. This is solved by always purging MSTCPY locks, in
* which case, the request reply would always be ignored and the request
* resent.
*/
static int recover_master_static(struct dlm_rsb *r, unsigned int *count)
{
int dir_nodeid = dlm_dir_nodeid(r);
int new_master = dir_nodeid;
if (dir_nodeid == dlm_our_nodeid())
new_master = 0;
dlm_purge_mstcpy_locks(r);
r->res_master_nodeid = dir_nodeid;
r->res_nodeid = new_master;
set_new_master(r);
(*count)++;
return 0;
}
/*
* Go through local root resources and for each rsb which has a master which
* has departed, get the new master nodeid from the directory. The dir will
* assign mastery to the first node to look up the new master. That means
* we'll discover in this lookup if we're the new master of any rsb's.
*
* We fire off all the dir lookup requests individually and asynchronously to
* the correct dir node.
*/
int dlm_recover_masters(struct dlm_ls *ls)
{
struct dlm_rsb *r;
unsigned int total = 0;
unsigned int count = 0;
int nodir = dlm_no_directory(ls);
int error;
log_rinfo(ls, "dlm_recover_masters");
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (dlm_recovery_stopped(ls)) {
up_read(&ls->ls_root_sem);
error = -EINTR;
goto out;
}
lock_rsb(r);
if (nodir)
error = recover_master_static(r, &count);
else
error = recover_master(r, &count);
unlock_rsb(r);
cond_resched();
total++;
if (error) {
up_read(&ls->ls_root_sem);
goto out;
}
}
up_read(&ls->ls_root_sem);
log_rinfo(ls, "dlm_recover_masters %u of %u", count, total);
error = dlm_wait_function(ls, &recover_idr_empty);
out:
if (error)
recover_idr_clear(ls);
return error;
}
int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
{
struct dlm_rsb *r;
int ret_nodeid, new_master;
r = recover_idr_find(ls, rc->rc_id);
if (!r) {
log_error(ls, "dlm_recover_master_reply no id %llx",
(unsigned long long)rc->rc_id);
goto out;
}
ret_nodeid = rc->rc_result;
if (ret_nodeid == dlm_our_nodeid())
new_master = 0;
else
new_master = ret_nodeid;
lock_rsb(r);
r->res_master_nodeid = ret_nodeid;
r->res_nodeid = new_master;
set_new_master(r);
unlock_rsb(r);
recover_idr_del(r);
if (recover_idr_empty(ls))
wake_up(&ls->ls_wait_general);
out:
return 0;
}
/* Lock recovery: rebuild the process-copy locks we hold on a
remastered rsb on the new rsb master.
dlm_recover_locks
recover_locks
recover_locks_queue
dlm_send_rcom_lock -> receive_rcom_lock
dlm_recover_master_copy
receive_rcom_lock_reply <-
dlm_recover_process_copy
*/
/*
* keep a count of the number of lkb's we send to the new master; when we get
* an equal number of replies then recovery for the rsb is done
*/
static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
{
struct dlm_lkb *lkb;
int error = 0;
list_for_each_entry(lkb, head, lkb_statequeue) {
error = dlm_send_rcom_lock(r, lkb);
if (error)
break;
r->res_recover_locks_count++;
}
return error;
}
static int recover_locks(struct dlm_rsb *r)
{
int error = 0;
lock_rsb(r);
DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
error = recover_locks_queue(r, &r->res_grantqueue);
if (error)
goto out;
error = recover_locks_queue(r, &r->res_convertqueue);
if (error)
goto out;
error = recover_locks_queue(r, &r->res_waitqueue);
if (error)
goto out;
if (r->res_recover_locks_count)
recover_list_add(r);
else
rsb_clear_flag(r, RSB_NEW_MASTER);
out:
unlock_rsb(r);
return error;
}
int dlm_recover_locks(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int error, count = 0;
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (is_master(r)) {
rsb_clear_flag(r, RSB_NEW_MASTER);
continue;
}
if (!rsb_flag(r, RSB_NEW_MASTER))
continue;
if (dlm_recovery_stopped(ls)) {
error = -EINTR;
up_read(&ls->ls_root_sem);
goto out;
}
error = recover_locks(r);
if (error) {
up_read(&ls->ls_root_sem);
goto out;
}
count += r->res_recover_locks_count;
}
up_read(&ls->ls_root_sem);
log_rinfo(ls, "dlm_recover_locks %d out", count);
error = dlm_wait_function(ls, &recover_list_empty);
out:
if (error)
recover_list_clear(ls);
return error;
}
void dlm_recovered_lock(struct dlm_rsb *r)
{
DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
r->res_recover_locks_count--;
if (!r->res_recover_locks_count) {
rsb_clear_flag(r, RSB_NEW_MASTER);
recover_list_del(r);
}
if (recover_list_empty(r->res_ls))
wake_up(&r->res_ls->ls_wait_general);
}
/*
* The lvb needs to be recovered on all master rsb's. This includes setting
* the VALNOTVALID flag if necessary, and determining the correct lvb contents
* based on the lvb's of the locks held on the rsb.
*
* RSB_VALNOTVALID is set in two cases:
*
* 1. we are master, but not new, and we purged an EX/PW lock held by a
* failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL)
*
* 2. we are a new master, and there are only NL/CR locks left.
* (We could probably improve this by only invaliding in this way when
* the previous master left uncleanly. VMS docs mention that.)
*
* The LVB contents are only considered for changing when this is a new master
* of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with
* mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken
* from the lkb with the largest lvb sequence number.
*/
static void recover_lvb(struct dlm_rsb *r)
{
struct dlm_lkb *lkb, *high_lkb = NULL;
uint32_t high_seq = 0;
int lock_lvb_exists = 0;
int big_lock_exists = 0;
int lvblen = r->res_ls->ls_lvblen;
if (!rsb_flag(r, RSB_NEW_MASTER2) &&
rsb_flag(r, RSB_RECOVER_LVB_INVAL)) {
/* case 1 above */
rsb_set_flag(r, RSB_VALNOTVALID);
return;
}
if (!rsb_flag(r, RSB_NEW_MASTER2))
return;
/* we are the new master, so figure out if VALNOTVALID should
be set, and set the rsb lvb from the best lkb available. */
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
if (lkb->lkb_grmode > DLM_LOCK_CR) {
big_lock_exists = 1;
goto setflag;
}
if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
high_lkb = lkb;
high_seq = lkb->lkb_lvbseq;
}
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
if (lkb->lkb_grmode > DLM_LOCK_CR) {
big_lock_exists = 1;
goto setflag;
}
if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
high_lkb = lkb;
high_seq = lkb->lkb_lvbseq;
}
}
setflag:
if (!lock_lvb_exists)
goto out;
/* lvb is invalidated if only NL/CR locks remain */
if (!big_lock_exists)
rsb_set_flag(r, RSB_VALNOTVALID);
if (!r->res_lvbptr) {
r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
if (!r->res_lvbptr)
goto out;
}
if (big_lock_exists) {
r->res_lvbseq = lkb->lkb_lvbseq;
memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
} else if (high_lkb) {
r->res_lvbseq = high_lkb->lkb_lvbseq;
memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
} else {
r->res_lvbseq = 0;
memset(r->res_lvbptr, 0, lvblen);
}
out:
return;
}
/* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks
converting PR->CW or CW->PR need to have their lkb_grmode set. */
static void recover_conversion(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
struct dlm_lkb *lkb;
int grmode = -1;
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
if (lkb->lkb_grmode == DLM_LOCK_PR ||
lkb->lkb_grmode == DLM_LOCK_CW) {
grmode = lkb->lkb_grmode;
break;
}
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
if (lkb->lkb_grmode != DLM_LOCK_IV)
continue;
if (grmode == -1) {
log_debug(ls, "recover_conversion %x set gr to rq %d",
lkb->lkb_id, lkb->lkb_rqmode);
lkb->lkb_grmode = lkb->lkb_rqmode;
} else {
log_debug(ls, "recover_conversion %x set gr %d",
lkb->lkb_id, grmode);
lkb->lkb_grmode = grmode;
}
}
}
/* We've become the new master for this rsb and waiting/converting locks may
need to be granted in dlm_recover_grant() due to locks that may have
existed from a removed node. */
static void recover_grant(struct dlm_rsb *r)
{
if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
rsb_set_flag(r, RSB_RECOVER_GRANT);
}
void dlm_recover_rsbs(struct dlm_ls *ls)
{
struct dlm_rsb *r;
unsigned int count = 0;
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
lock_rsb(r);
if (is_master(r)) {
if (rsb_flag(r, RSB_RECOVER_CONVERT))
recover_conversion(r);
/* recover lvb before granting locks so the updated
lvb/VALNOTVALID is presented in the completion */
recover_lvb(r);
if (rsb_flag(r, RSB_NEW_MASTER2))
recover_grant(r);
count++;
} else {
rsb_clear_flag(r, RSB_VALNOTVALID);
}
rsb_clear_flag(r, RSB_RECOVER_CONVERT);
rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL);
rsb_clear_flag(r, RSB_NEW_MASTER2);
unlock_rsb(r);
}
up_read(&ls->ls_root_sem);
if (count)
log_rinfo(ls, "dlm_recover_rsbs %d done", count);
}
/* Create a single list of all root rsb's to be used during recovery */
int dlm_create_root_list(struct dlm_ls *ls)
{
struct rb_node *n;
struct dlm_rsb *r;
int i, error = 0;
down_write(&ls->ls_root_sem);
if (!list_empty(&ls->ls_root_list)) {
log_error(ls, "root list not empty");
error = -EINVAL;
goto out;
}
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
spin_lock(&ls->ls_rsbtbl[i].lock);
for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
r = rb_entry(n, struct dlm_rsb, res_hashnode);
list_add(&r->res_root_list, &ls->ls_root_list);
dlm_hold_rsb(r);
}
if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[i].toss))
log_error(ls, "dlm_create_root_list toss not empty");
spin_unlock(&ls->ls_rsbtbl[i].lock);
}
out:
up_write(&ls->ls_root_sem);
return error;
}
void dlm_release_root_list(struct dlm_ls *ls)
{
struct dlm_rsb *r, *safe;
down_write(&ls->ls_root_sem);
list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
list_del_init(&r->res_root_list);
dlm_put_rsb(r);
}
up_write(&ls->ls_root_sem);
}
void dlm_clear_toss(struct dlm_ls *ls)
{
struct rb_node *n, *next;
struct dlm_rsb *r;
unsigned int count = 0;
int i;
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
spin_lock(&ls->ls_rsbtbl[i].lock);
for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) {
next = rb_next(n);
r = rb_entry(n, struct dlm_rsb, res_hashnode);
rb_erase(n, &ls->ls_rsbtbl[i].toss);
dlm_free_rsb(r);
count++;
}
spin_unlock(&ls->ls_rsbtbl[i].lock);
}
if (count)
log_rinfo(ls, "dlm_clear_toss %u done", count);
}

34
fs/dlm/recover.h Normal file
View file

@ -0,0 +1,34 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __RECOVER_DOT_H__
#define __RECOVER_DOT_H__
int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls));
uint32_t dlm_recover_status(struct dlm_ls *ls);
void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status);
int dlm_recover_members_wait(struct dlm_ls *ls);
int dlm_recover_directory_wait(struct dlm_ls *ls);
int dlm_recover_locks_wait(struct dlm_ls *ls);
int dlm_recover_done_wait(struct dlm_ls *ls);
int dlm_recover_masters(struct dlm_ls *ls);
int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_locks(struct dlm_ls *ls);
void dlm_recovered_lock(struct dlm_rsb *r);
int dlm_create_root_list(struct dlm_ls *ls);
void dlm_release_root_list(struct dlm_ls *ls);
void dlm_clear_toss(struct dlm_ls *ls);
void dlm_recover_rsbs(struct dlm_ls *ls);
#endif /* __RECOVER_DOT_H__ */

342
fs/dlm/recoverd.c Normal file
View file

@ -0,0 +1,342 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "dir.h"
#include "ast.h"
#include "recover.h"
#include "lowcomms.h"
#include "lock.h"
#include "requestqueue.h"
#include "recoverd.h"
/* If the start for which we're re-enabling locking (seq) has been superseded
by a newer stop (ls_recover_seq), we need to leave locking disabled.
We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
enables locking and clears the requestqueue between a and b. */
static int enable_locking(struct dlm_ls *ls, uint64_t seq)
{
int error = -EINTR;
down_write(&ls->ls_recv_active);
spin_lock(&ls->ls_recover_lock);
if (ls->ls_recover_seq == seq) {
set_bit(LSFL_RUNNING, &ls->ls_flags);
/* unblocks processes waiting to enter the dlm */
up_write(&ls->ls_in_recovery);
clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
error = 0;
}
spin_unlock(&ls->ls_recover_lock);
up_write(&ls->ls_recv_active);
return error;
}
static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
{
unsigned long start;
int error, neg = 0;
log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq);
mutex_lock(&ls->ls_recoverd_active);
dlm_callback_suspend(ls);
dlm_clear_toss(ls);
/*
* This list of root rsb's will be the basis of most of the recovery
* routines.
*/
dlm_create_root_list(ls);
/*
* Add or remove nodes from the lockspace's ls_nodes list.
*/
error = dlm_recover_members(ls, rv, &neg);
if (error) {
log_rinfo(ls, "dlm_recover_members error %d", error);
goto fail;
}
dlm_recover_dir_nodeid(ls);
ls->ls_recover_dir_sent_res = 0;
ls->ls_recover_dir_sent_msg = 0;
ls->ls_recover_locks_in = 0;
dlm_set_recover_status(ls, DLM_RS_NODES);
error = dlm_recover_members_wait(ls);
if (error) {
log_rinfo(ls, "dlm_recover_members_wait error %d", error);
goto fail;
}
start = jiffies;
/*
* Rebuild our own share of the directory by collecting from all other
* nodes their master rsb names that hash to us.
*/
error = dlm_recover_directory(ls);
if (error) {
log_rinfo(ls, "dlm_recover_directory error %d", error);
goto fail;
}
dlm_set_recover_status(ls, DLM_RS_DIR);
error = dlm_recover_directory_wait(ls);
if (error) {
log_rinfo(ls, "dlm_recover_directory_wait error %d", error);
goto fail;
}
log_rinfo(ls, "dlm_recover_directory %u out %u messages",
ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg);
/*
* We may have outstanding operations that are waiting for a reply from
* a failed node. Mark these to be resent after recovery. Unlock and
* cancel ops can just be completed.
*/
dlm_recover_waiters_pre(ls);
error = dlm_recovery_stopped(ls);
if (error)
goto fail;
if (neg || dlm_no_directory(ls)) {
/*
* Clear lkb's for departed nodes.
*/
dlm_recover_purge(ls);
/*
* Get new master nodeid's for rsb's that were mastered on
* departed nodes.
*/
error = dlm_recover_masters(ls);
if (error) {
log_rinfo(ls, "dlm_recover_masters error %d", error);
goto fail;
}
/*
* Send our locks on remastered rsb's to the new masters.
*/
error = dlm_recover_locks(ls);
if (error) {
log_rinfo(ls, "dlm_recover_locks error %d", error);
goto fail;
}
dlm_set_recover_status(ls, DLM_RS_LOCKS);
error = dlm_recover_locks_wait(ls);
if (error) {
log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
goto fail;
}
log_rinfo(ls, "dlm_recover_locks %u in",
ls->ls_recover_locks_in);
/*
* Finalize state in master rsb's now that all locks can be
* checked. This includes conversion resolution and lvb
* settings.
*/
dlm_recover_rsbs(ls);
} else {
/*
* Other lockspace members may be going through the "neg" steps
* while also adding us to the lockspace, in which case they'll
* be doing the recover_locks (RS_LOCKS) barrier.
*/
dlm_set_recover_status(ls, DLM_RS_LOCKS);
error = dlm_recover_locks_wait(ls);
if (error) {
log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
goto fail;
}
}
dlm_release_root_list(ls);
/*
* Purge directory-related requests that are saved in requestqueue.
* All dir requests from before recovery are invalid now due to the dir
* rebuild and will be resent by the requesting nodes.
*/
dlm_purge_requestqueue(ls);
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
log_rinfo(ls, "dlm_recover_done_wait error %d", error);
goto fail;
}
dlm_clear_members_gone(ls);
dlm_adjust_timeouts(ls);
dlm_callback_resume(ls);
error = enable_locking(ls, rv->seq);
if (error) {
log_rinfo(ls, "enable_locking error %d", error);
goto fail;
}
error = dlm_process_requestqueue(ls);
if (error) {
log_rinfo(ls, "dlm_process_requestqueue error %d", error);
goto fail;
}
error = dlm_recover_waiters_post(ls);
if (error) {
log_rinfo(ls, "dlm_recover_waiters_post error %d", error);
goto fail;
}
dlm_recover_grant(ls);
log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms",
(unsigned long long)rv->seq, ls->ls_generation,
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
dlm_lsop_recover_done(ls);
return 0;
fail:
dlm_release_root_list(ls);
log_rinfo(ls, "dlm_recover %llu error %d",
(unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
return error;
}
/* The dlm_ls_start() that created the rv we take here may already have been
stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
flag set. */
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
ls->ls_recover_args = NULL;
if (rv && ls->ls_recover_seq == rv->seq)
clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
spin_unlock(&ls->ls_recover_lock);
if (rv) {
ls_recover(ls, rv);
kfree(rv->nodes);
kfree(rv);
}
}
static int dlm_recoverd(void *arg)
{
struct dlm_ls *ls;
ls = dlm_find_lockspace_local(arg);
if (!ls) {
log_print("dlm_recoverd: no lockspace %p", arg);
return -1;
}
down_write(&ls->ls_in_recovery);
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
wake_up(&ls->ls_recover_lock_wait);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
!test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
schedule();
set_current_state(TASK_RUNNING);
if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
down_write(&ls->ls_in_recovery);
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
wake_up(&ls->ls_recover_lock_wait);
}
if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
do_ls_recovery(ls);
}
if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
up_write(&ls->ls_in_recovery);
dlm_put_lockspace(ls);
return 0;
}
int dlm_recoverd_start(struct dlm_ls *ls)
{
struct task_struct *p;
int error = 0;
p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
if (IS_ERR(p))
error = PTR_ERR(p);
else
ls->ls_recoverd_task = p;
return error;
}
void dlm_recoverd_stop(struct dlm_ls *ls)
{
kthread_stop(ls->ls_recoverd_task);
}
void dlm_recoverd_suspend(struct dlm_ls *ls)
{
wake_up(&ls->ls_wait_general);
mutex_lock(&ls->ls_recoverd_active);
}
void dlm_recoverd_resume(struct dlm_ls *ls)
{
mutex_unlock(&ls->ls_recoverd_active);
}

23
fs/dlm/recoverd.h Normal file
View file

@ -0,0 +1,23 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __RECOVERD_DOT_H__
#define __RECOVERD_DOT_H__
void dlm_recoverd_stop(struct dlm_ls *ls);
int dlm_recoverd_start(struct dlm_ls *ls);
void dlm_recoverd_suspend(struct dlm_ls *ls);
void dlm_recoverd_resume(struct dlm_ls *ls);
#endif /* __RECOVERD_DOT_H__ */

171
fs/dlm/requestqueue.c Normal file
View file

@ -0,0 +1,171 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "member.h"
#include "lock.h"
#include "dir.h"
#include "config.h"
#include "requestqueue.h"
struct rq_entry {
struct list_head list;
uint32_t recover_seq;
int nodeid;
struct dlm_message request;
};
/*
* Requests received while the lockspace is in recovery get added to the
* request queue and processed when recovery is complete. This happens when
* the lockspace is suspended on some nodes before it is on others, or the
* lockspace is enabled on some while still suspended on others.
*/
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
{
struct rq_entry *e;
int length = ms->m_header.h_length - sizeof(struct dlm_message);
e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS);
if (!e) {
log_print("dlm_add_requestqueue: out of memory len %d", length);
return;
}
e->recover_seq = ls->ls_recover_seq & 0xFFFFFFFF;
e->nodeid = nodeid;
memcpy(&e->request, ms, ms->m_header.h_length);
mutex_lock(&ls->ls_requestqueue_mutex);
list_add_tail(&e->list, &ls->ls_requestqueue);
mutex_unlock(&ls->ls_requestqueue_mutex);
}
/*
* Called by dlm_recoverd to process normal messages saved while recovery was
* happening. Normal locking has been enabled before this is called. dlm_recv
* upon receiving a message, will wait for all saved messages to be drained
* here before processing the message it got. If a new dlm_ls_stop() arrives
* while we're processing these saved messages, it may block trying to suspend
* dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that
* case, we don't abort since locking_stopped is still 0. If dlm_recv is not
* waiting for us, then this processing may be aborted due to locking_stopped.
*/
int dlm_process_requestqueue(struct dlm_ls *ls)
{
struct rq_entry *e;
struct dlm_message *ms;
int error = 0;
mutex_lock(&ls->ls_requestqueue_mutex);
for (;;) {
if (list_empty(&ls->ls_requestqueue)) {
mutex_unlock(&ls->ls_requestqueue_mutex);
error = 0;
break;
}
e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
mutex_unlock(&ls->ls_requestqueue_mutex);
ms = &e->request;
log_limit(ls, "dlm_process_requestqueue msg %d from %d "
"lkid %x remid %x result %d seq %u",
ms->m_type, ms->m_header.h_nodeid,
ms->m_lkid, ms->m_remid, ms->m_result,
e->recover_seq);
dlm_receive_message_saved(ls, &e->request, e->recover_seq);
mutex_lock(&ls->ls_requestqueue_mutex);
list_del(&e->list);
kfree(e);
if (dlm_locking_stopped(ls)) {
log_debug(ls, "process_requestqueue abort running");
mutex_unlock(&ls->ls_requestqueue_mutex);
error = -EINTR;
break;
}
schedule();
}
return error;
}
/*
* After recovery is done, locking is resumed and dlm_recoverd takes all the
* saved requests and processes them as they would have been by dlm_recv. At
* the same time, dlm_recv will start receiving new requests from remote nodes.
* We want to delay dlm_recv processing new requests until dlm_recoverd has
* finished processing the old saved requests. We don't check for locking
* stopped here because dlm_ls_stop won't stop locking until it's suspended us
* (dlm_recv).
*/
void dlm_wait_requestqueue(struct dlm_ls *ls)
{
for (;;) {
mutex_lock(&ls->ls_requestqueue_mutex);
if (list_empty(&ls->ls_requestqueue))
break;
mutex_unlock(&ls->ls_requestqueue_mutex);
schedule();
}
mutex_unlock(&ls->ls_requestqueue_mutex);
}
static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
{
uint32_t type = ms->m_type;
/* the ls is being cleaned up and freed by release_lockspace */
if (!ls->ls_count)
return 1;
if (dlm_is_removed(ls, nodeid))
return 1;
/* directory operations are always purged because the directory is
always rebuilt during recovery and the lookups resent */
if (type == DLM_MSG_REMOVE ||
type == DLM_MSG_LOOKUP ||
type == DLM_MSG_LOOKUP_REPLY)
return 1;
if (!dlm_no_directory(ls))
return 0;
return 1;
}
void dlm_purge_requestqueue(struct dlm_ls *ls)
{
struct dlm_message *ms;
struct rq_entry *e, *safe;
mutex_lock(&ls->ls_requestqueue_mutex);
list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) {
ms = &e->request;
if (purge_request(ls, ms, e->nodeid)) {
list_del(&e->list);
kfree(e);
}
}
mutex_unlock(&ls->ls_requestqueue_mutex);
}

22
fs/dlm/requestqueue.h Normal file
View file

@ -0,0 +1,22 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __REQUESTQUEUE_DOT_H__
#define __REQUESTQUEUE_DOT_H__
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms);
int dlm_process_requestqueue(struct dlm_ls *ls);
void dlm_wait_requestqueue(struct dlm_ls *ls);
void dlm_purge_requestqueue(struct dlm_ls *ls);
#endif

1006
fs/dlm/user.c Normal file

File diff suppressed because it is too large Load diff

19
fs/dlm/user.h Normal file
View file

@ -0,0 +1,19 @@
/*
* Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*/
#ifndef __USER_DOT_H__
#define __USER_DOT_H__
void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq);
int dlm_user_init(void);
void dlm_user_exit(void);
int dlm_device_deregister(struct dlm_ls *ls);
int dlm_user_daemon_available(void);
#endif

154
fs/dlm/util.c Normal file
View file

@ -0,0 +1,154 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "rcom.h"
#include "util.h"
#define DLM_ERRNO_EDEADLK 35
#define DLM_ERRNO_EBADR 53
#define DLM_ERRNO_EBADSLT 57
#define DLM_ERRNO_EPROTO 71
#define DLM_ERRNO_EOPNOTSUPP 95
#define DLM_ERRNO_ETIMEDOUT 110
#define DLM_ERRNO_EINPROGRESS 115
static void header_out(struct dlm_header *hd)
{
hd->h_version = cpu_to_le32(hd->h_version);
hd->h_lockspace = cpu_to_le32(hd->h_lockspace);
hd->h_nodeid = cpu_to_le32(hd->h_nodeid);
hd->h_length = cpu_to_le16(hd->h_length);
}
static void header_in(struct dlm_header *hd)
{
hd->h_version = le32_to_cpu(hd->h_version);
hd->h_lockspace = le32_to_cpu(hd->h_lockspace);
hd->h_nodeid = le32_to_cpu(hd->h_nodeid);
hd->h_length = le16_to_cpu(hd->h_length);
}
/* higher errno values are inconsistent across architectures, so select
one set of values for on the wire */
static int to_dlm_errno(int err)
{
switch (err) {
case -EDEADLK:
return -DLM_ERRNO_EDEADLK;
case -EBADR:
return -DLM_ERRNO_EBADR;
case -EBADSLT:
return -DLM_ERRNO_EBADSLT;
case -EPROTO:
return -DLM_ERRNO_EPROTO;
case -EOPNOTSUPP:
return -DLM_ERRNO_EOPNOTSUPP;
case -ETIMEDOUT:
return -DLM_ERRNO_ETIMEDOUT;
case -EINPROGRESS:
return -DLM_ERRNO_EINPROGRESS;
}
return err;
}
static int from_dlm_errno(int err)
{
switch (err) {
case -DLM_ERRNO_EDEADLK:
return -EDEADLK;
case -DLM_ERRNO_EBADR:
return -EBADR;
case -DLM_ERRNO_EBADSLT:
return -EBADSLT;
case -DLM_ERRNO_EPROTO:
return -EPROTO;
case -DLM_ERRNO_EOPNOTSUPP:
return -EOPNOTSUPP;
case -DLM_ERRNO_ETIMEDOUT:
return -ETIMEDOUT;
case -DLM_ERRNO_EINPROGRESS:
return -EINPROGRESS;
}
return err;
}
void dlm_message_out(struct dlm_message *ms)
{
header_out(&ms->m_header);
ms->m_type = cpu_to_le32(ms->m_type);
ms->m_nodeid = cpu_to_le32(ms->m_nodeid);
ms->m_pid = cpu_to_le32(ms->m_pid);
ms->m_lkid = cpu_to_le32(ms->m_lkid);
ms->m_remid = cpu_to_le32(ms->m_remid);
ms->m_parent_lkid = cpu_to_le32(ms->m_parent_lkid);
ms->m_parent_remid = cpu_to_le32(ms->m_parent_remid);
ms->m_exflags = cpu_to_le32(ms->m_exflags);
ms->m_sbflags = cpu_to_le32(ms->m_sbflags);
ms->m_flags = cpu_to_le32(ms->m_flags);
ms->m_lvbseq = cpu_to_le32(ms->m_lvbseq);
ms->m_hash = cpu_to_le32(ms->m_hash);
ms->m_status = cpu_to_le32(ms->m_status);
ms->m_grmode = cpu_to_le32(ms->m_grmode);
ms->m_rqmode = cpu_to_le32(ms->m_rqmode);
ms->m_bastmode = cpu_to_le32(ms->m_bastmode);
ms->m_asts = cpu_to_le32(ms->m_asts);
ms->m_result = cpu_to_le32(to_dlm_errno(ms->m_result));
}
void dlm_message_in(struct dlm_message *ms)
{
header_in(&ms->m_header);
ms->m_type = le32_to_cpu(ms->m_type);
ms->m_nodeid = le32_to_cpu(ms->m_nodeid);
ms->m_pid = le32_to_cpu(ms->m_pid);
ms->m_lkid = le32_to_cpu(ms->m_lkid);
ms->m_remid = le32_to_cpu(ms->m_remid);
ms->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid);
ms->m_parent_remid = le32_to_cpu(ms->m_parent_remid);
ms->m_exflags = le32_to_cpu(ms->m_exflags);
ms->m_sbflags = le32_to_cpu(ms->m_sbflags);
ms->m_flags = le32_to_cpu(ms->m_flags);
ms->m_lvbseq = le32_to_cpu(ms->m_lvbseq);
ms->m_hash = le32_to_cpu(ms->m_hash);
ms->m_status = le32_to_cpu(ms->m_status);
ms->m_grmode = le32_to_cpu(ms->m_grmode);
ms->m_rqmode = le32_to_cpu(ms->m_rqmode);
ms->m_bastmode = le32_to_cpu(ms->m_bastmode);
ms->m_asts = le32_to_cpu(ms->m_asts);
ms->m_result = from_dlm_errno(le32_to_cpu(ms->m_result));
}
void dlm_rcom_out(struct dlm_rcom *rc)
{
header_out(&rc->rc_header);
rc->rc_type = cpu_to_le32(rc->rc_type);
rc->rc_result = cpu_to_le32(rc->rc_result);
rc->rc_id = cpu_to_le64(rc->rc_id);
rc->rc_seq = cpu_to_le64(rc->rc_seq);
rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
}
void dlm_rcom_in(struct dlm_rcom *rc)
{
header_in(&rc->rc_header);
rc->rc_type = le32_to_cpu(rc->rc_type);
rc->rc_result = le32_to_cpu(rc->rc_result);
rc->rc_id = le64_to_cpu(rc->rc_id);
rc->rc_seq = le64_to_cpu(rc->rc_seq);
rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
}

22
fs/dlm/util.h Normal file
View file

@ -0,0 +1,22 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __UTIL_DOT_H__
#define __UTIL_DOT_H__
void dlm_message_out(struct dlm_message *ms);
void dlm_message_in(struct dlm_message *ms);
void dlm_rcom_out(struct dlm_rcom *rc);
void dlm_rcom_in(struct dlm_rcom *rc);
#endif