mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-09 17:02:46 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
73
drivers/block/drbd/Kconfig
Normal file
73
drivers/block/drbd/Kconfig
Normal file
|
@ -0,0 +1,73 @@
|
|||
#
|
||||
# DRBD device driver configuration
|
||||
#
|
||||
|
||||
comment "DRBD disabled because PROC_FS or INET not selected"
|
||||
depends on PROC_FS='n' || INET='n'
|
||||
|
||||
config BLK_DEV_DRBD
|
||||
tristate "DRBD Distributed Replicated Block Device support"
|
||||
depends on PROC_FS && INET
|
||||
select LRU_CACHE
|
||||
select LIBCRC32C
|
||||
default n
|
||||
help
|
||||
|
||||
NOTE: In order to authenticate connections you have to select
|
||||
CRYPTO_HMAC and a hash function as well.
|
||||
|
||||
DRBD is a shared-nothing, synchronously replicated block device. It
|
||||
is designed to serve as a building block for high availability
|
||||
clusters and in this context, is a "drop-in" replacement for shared
|
||||
storage. Simplistically, you could see it as a network RAID 1.
|
||||
|
||||
Each minor device has a role, which can be 'primary' or 'secondary'.
|
||||
On the node with the primary device the application is supposed to
|
||||
run and to access the device (/dev/drbdX). Every write is sent to
|
||||
the local 'lower level block device' and, across the network, to the
|
||||
node with the device in 'secondary' state. The secondary device
|
||||
simply writes the data to its lower level block device.
|
||||
|
||||
DRBD can also be used in dual-Primary mode (device writable on both
|
||||
nodes), which means it can exhibit shared disk semantics in a
|
||||
shared-nothing cluster. Needless to say, on top of dual-Primary
|
||||
DRBD utilizing a cluster file system is necessary to maintain for
|
||||
cache coherency.
|
||||
|
||||
For automatic failover you need a cluster manager (e.g. heartbeat).
|
||||
See also: http://www.drbd.org/, http://www.linux-ha.org
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config DRBD_FAULT_INJECTION
|
||||
bool "DRBD fault injection"
|
||||
depends on BLK_DEV_DRBD
|
||||
help
|
||||
|
||||
Say Y here if you want to simulate IO errors, in order to test DRBD's
|
||||
behavior.
|
||||
|
||||
The actual simulation of IO errors is done by writing 3 values to
|
||||
/sys/module/drbd/parameters/
|
||||
|
||||
enable_faults: bitmask of...
|
||||
1 meta data write
|
||||
2 read
|
||||
4 resync data write
|
||||
8 read
|
||||
16 data write
|
||||
32 data read
|
||||
64 read ahead
|
||||
128 kmalloc of bitmap
|
||||
256 allocation of peer_requests
|
||||
512 insert data corruption on receiving side
|
||||
|
||||
fault_devs: bitmask of minor numbers
|
||||
fault_rate: frequency in percent
|
||||
|
||||
Example: Simulate data write errors on /dev/drbd0 with a probability of 5%.
|
||||
echo 16 > /sys/module/drbd/parameters/enable_faults
|
||||
echo 1 > /sys/module/drbd/parameters/fault_devs
|
||||
echo 5 > /sys/module/drbd/parameters/fault_rate
|
||||
|
||||
If unsure, say N.
|
8
drivers/block/drbd/Makefile
Normal file
8
drivers/block/drbd/Makefile
Normal file
|
@ -0,0 +1,8 @@
|
|||
drbd-y := drbd_bitmap.o drbd_proc.o
|
||||
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
|
||||
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
|
||||
drbd-y += drbd_interval.o drbd_state.o
|
||||
drbd-y += drbd_nla.o
|
||||
drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
|
1220
drivers/block/drbd/drbd_actlog.c
Normal file
1220
drivers/block/drbd/drbd_actlog.c
Normal file
File diff suppressed because it is too large
Load diff
1648
drivers/block/drbd/drbd_bitmap.c
Normal file
1648
drivers/block/drbd/drbd_bitmap.c
Normal file
File diff suppressed because it is too large
Load diff
958
drivers/block/drbd/drbd_debugfs.c
Normal file
958
drivers/block/drbd/drbd_debugfs.c
Normal file
|
@ -0,0 +1,958 @@
|
|||
#define pr_fmt(fmt) "drbd debugfs: " fmt
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
#include "drbd_int.h"
|
||||
#include "drbd_req.h"
|
||||
#include "drbd_debugfs.h"
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Whenever you change the file format, remember to bump the version. *
|
||||
**********************************************************************/
|
||||
|
||||
static struct dentry *drbd_debugfs_root;
|
||||
static struct dentry *drbd_debugfs_version;
|
||||
static struct dentry *drbd_debugfs_resources;
|
||||
static struct dentry *drbd_debugfs_minors;
|
||||
|
||||
static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt)
|
||||
{
|
||||
if (valid)
|
||||
seq_printf(m, "\t%d", jiffies_to_msecs(dt));
|
||||
else
|
||||
seq_printf(m, "\t-");
|
||||
}
|
||||
|
||||
static void __seq_print_rq_state_bit(struct seq_file *m,
|
||||
bool is_set, char *sep, const char *set_name, const char *unset_name)
|
||||
{
|
||||
if (is_set && set_name) {
|
||||
seq_putc(m, *sep);
|
||||
seq_puts(m, set_name);
|
||||
*sep = '|';
|
||||
} else if (!is_set && unset_name) {
|
||||
seq_putc(m, *sep);
|
||||
seq_puts(m, unset_name);
|
||||
*sep = '|';
|
||||
}
|
||||
}
|
||||
|
||||
static void seq_print_rq_state_bit(struct seq_file *m,
|
||||
bool is_set, char *sep, const char *set_name)
|
||||
{
|
||||
__seq_print_rq_state_bit(m, is_set, sep, set_name, NULL);
|
||||
}
|
||||
|
||||
/* pretty print enum drbd_req_state_bits req->rq_state */
|
||||
static void seq_print_request_state(struct seq_file *m, struct drbd_request *req)
|
||||
{
|
||||
unsigned int s = req->rq_state;
|
||||
char sep = ' ';
|
||||
seq_printf(m, "\t0x%08x", s);
|
||||
seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed");
|
||||
|
||||
/* RQ_WRITE ignored, already reported */
|
||||
seq_puts(m, "\tlocal:");
|
||||
seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL");
|
||||
seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed");
|
||||
seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended");
|
||||
sep = ' ';
|
||||
seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending");
|
||||
seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed");
|
||||
seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted");
|
||||
seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok");
|
||||
if (sep == ' ')
|
||||
seq_puts(m, " -");
|
||||
|
||||
/* for_each_connection ... */
|
||||
seq_printf(m, "\tnet:");
|
||||
sep = ' ';
|
||||
seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending");
|
||||
seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued");
|
||||
seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent");
|
||||
seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done");
|
||||
seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis");
|
||||
seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok");
|
||||
if (sep == ' ')
|
||||
seq_puts(m, " -");
|
||||
|
||||
seq_printf(m, " :");
|
||||
sep = ' ';
|
||||
seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B");
|
||||
seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C");
|
||||
seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr");
|
||||
if (sep == ' ')
|
||||
seq_puts(m, " -");
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
|
||||
static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now)
|
||||
{
|
||||
/* change anything here, fixup header below! */
|
||||
unsigned int s = req->rq_state;
|
||||
|
||||
#define RQ_HDR_1 "epoch\tsector\tsize\trw"
|
||||
seq_printf(m, "0x%x\t%llu\t%u\t%s",
|
||||
req->epoch,
|
||||
(unsigned long long)req->i.sector, req->i.size >> 9,
|
||||
(s & RQ_WRITE) ? "W" : "R");
|
||||
|
||||
#define RQ_HDR_2 "\tstart\tin AL\tsubmit"
|
||||
seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif));
|
||||
seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif);
|
||||
seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif);
|
||||
|
||||
#define RQ_HDR_3 "\tsent\tacked\tdone"
|
||||
seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif);
|
||||
seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif);
|
||||
seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif);
|
||||
|
||||
#define RQ_HDR_4 "\tstate\n"
|
||||
seq_print_request_state(m, req);
|
||||
}
|
||||
#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4
|
||||
|
||||
static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now)
|
||||
{
|
||||
seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr);
|
||||
seq_print_one_request(m, req, now);
|
||||
}
|
||||
|
||||
static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
|
||||
{
|
||||
struct drbd_device *device;
|
||||
unsigned int i;
|
||||
|
||||
seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n");
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&resource->devices, device, i) {
|
||||
struct drbd_md_io tmp;
|
||||
/* In theory this is racy,
|
||||
* in the sense that there could have been a
|
||||
* drbd_md_put_buffer(); drbd_md_get_buffer();
|
||||
* between accessing these members here. */
|
||||
tmp = device->md_io;
|
||||
if (atomic_read(&tmp.in_use)) {
|
||||
seq_printf(m, "%u\t%u\t%d\t",
|
||||
device->minor, device->vnr,
|
||||
jiffies_to_msecs(now - tmp.start_jif));
|
||||
if (time_before(tmp.submit_jif, tmp.start_jif))
|
||||
seq_puts(m, "-\t");
|
||||
else
|
||||
seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif));
|
||||
seq_printf(m, "%s\n", tmp.current_use);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
|
||||
{
|
||||
struct drbd_device *device;
|
||||
unsigned int i;
|
||||
|
||||
seq_puts(m, "minor\tvnr\tage\t#waiting\n");
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&resource->devices, device, i) {
|
||||
unsigned long jif;
|
||||
struct drbd_request *req;
|
||||
int n = atomic_read(&device->ap_actlog_cnt);
|
||||
if (n) {
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
req = list_first_entry_or_null(&device->pending_master_completion[1],
|
||||
struct drbd_request, req_pending_master_completion);
|
||||
/* if the oldest request does not wait for the activity log
|
||||
* it is not interesting for us here */
|
||||
if (req && !(req->rq_state & RQ_IN_ACT_LOG))
|
||||
jif = req->start_jif;
|
||||
else
|
||||
req = NULL;
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
}
|
||||
if (n) {
|
||||
seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
|
||||
if (req)
|
||||
seq_printf(m, "%u\t", jiffies_to_msecs(now - jif));
|
||||
else
|
||||
seq_puts(m, "-\t");
|
||||
seq_printf(m, "%u\n", n);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now)
|
||||
{
|
||||
struct drbd_bm_aio_ctx *ctx;
|
||||
unsigned long start_jif;
|
||||
unsigned int in_flight;
|
||||
unsigned int flags;
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list);
|
||||
if (ctx && ctx->done)
|
||||
ctx = NULL;
|
||||
if (ctx) {
|
||||
start_jif = ctx->start_jif;
|
||||
in_flight = atomic_read(&ctx->in_flight);
|
||||
flags = ctx->flags;
|
||||
}
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
if (ctx) {
|
||||
seq_printf(m, "%u\t%u\t%c\t%u\t%u\n",
|
||||
device->minor, device->vnr,
|
||||
(flags & BM_AIO_READ) ? 'R' : 'W',
|
||||
jiffies_to_msecs(now - start_jif),
|
||||
in_flight);
|
||||
}
|
||||
}
|
||||
|
||||
static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
|
||||
{
|
||||
struct drbd_device *device;
|
||||
unsigned int i;
|
||||
|
||||
seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n");
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&resource->devices, device, i) {
|
||||
seq_print_device_bitmap_io(m, device, now);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/* pretty print enum peer_req->flags */
|
||||
static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req)
|
||||
{
|
||||
unsigned long f = peer_req->flags;
|
||||
char sep = ' ';
|
||||
|
||||
__seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing");
|
||||
__seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal");
|
||||
seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
|
||||
seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
|
||||
seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
|
||||
|
||||
if (f & EE_IS_TRIM) {
|
||||
seq_putc(m, sep);
|
||||
sep = '|';
|
||||
if (f & EE_IS_TRIM_USE_ZEROOUT)
|
||||
seq_puts(m, "zero-out");
|
||||
else
|
||||
seq_puts(m, "trim");
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
static void seq_print_peer_request(struct seq_file *m,
|
||||
struct drbd_device *device, struct list_head *lh,
|
||||
unsigned long now)
|
||||
{
|
||||
bool reported_preparing = false;
|
||||
struct drbd_peer_request *peer_req;
|
||||
list_for_each_entry(peer_req, lh, w.list) {
|
||||
if (reported_preparing && !(peer_req->flags & EE_SUBMITTED))
|
||||
continue;
|
||||
|
||||
if (device)
|
||||
seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
|
||||
|
||||
seq_printf(m, "%llu\t%u\t%c\t%u\t",
|
||||
(unsigned long long)peer_req->i.sector, peer_req->i.size >> 9,
|
||||
(peer_req->flags & EE_WRITE) ? 'W' : 'R',
|
||||
jiffies_to_msecs(now - peer_req->submit_jif));
|
||||
seq_print_peer_request_flags(m, peer_req);
|
||||
if (peer_req->flags & EE_SUBMITTED)
|
||||
break;
|
||||
else
|
||||
reported_preparing = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void seq_print_device_peer_requests(struct seq_file *m,
|
||||
struct drbd_device *device, unsigned long now)
|
||||
{
|
||||
seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n");
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
seq_print_peer_request(m, device, &device->active_ee, now);
|
||||
seq_print_peer_request(m, device, &device->read_ee, now);
|
||||
seq_print_peer_request(m, device, &device->sync_ee, now);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
if (test_bit(FLUSH_PENDING, &device->flags)) {
|
||||
seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n",
|
||||
device->minor, device->vnr,
|
||||
jiffies_to_msecs(now - device->flush_jif));
|
||||
}
|
||||
}
|
||||
|
||||
static void seq_print_resource_pending_peer_requests(struct seq_file *m,
|
||||
struct drbd_resource *resource, unsigned long now)
|
||||
{
|
||||
struct drbd_device *device;
|
||||
unsigned int i;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&resource->devices, device, i) {
|
||||
seq_print_device_peer_requests(m, device, now);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void seq_print_resource_transfer_log_summary(struct seq_file *m,
|
||||
struct drbd_resource *resource,
|
||||
struct drbd_connection *connection,
|
||||
unsigned long now)
|
||||
{
|
||||
struct drbd_request *req;
|
||||
unsigned int count = 0;
|
||||
unsigned int show_state = 0;
|
||||
|
||||
seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR);
|
||||
spin_lock_irq(&resource->req_lock);
|
||||
list_for_each_entry(req, &connection->transfer_log, tl_requests) {
|
||||
unsigned int tmp = 0;
|
||||
unsigned int s;
|
||||
++count;
|
||||
|
||||
/* don't disable irq "forever" */
|
||||
if (!(count & 0x1ff)) {
|
||||
struct drbd_request *req_next;
|
||||
kref_get(&req->kref);
|
||||
spin_unlock_irq(&resource->req_lock);
|
||||
cond_resched();
|
||||
spin_lock_irq(&resource->req_lock);
|
||||
req_next = list_next_entry(req, tl_requests);
|
||||
if (kref_put(&req->kref, drbd_req_destroy))
|
||||
req = req_next;
|
||||
if (&req->tl_requests == &connection->transfer_log)
|
||||
break;
|
||||
}
|
||||
|
||||
s = req->rq_state;
|
||||
|
||||
/* This is meant to summarize timing issues, to be able to tell
|
||||
* local disk problems from network problems.
|
||||
* Skip requests, if we have shown an even older request with
|
||||
* similar aspects already. */
|
||||
if (req->master_bio == NULL)
|
||||
tmp |= 1;
|
||||
if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING))
|
||||
tmp |= 2;
|
||||
if (s & RQ_NET_MASK) {
|
||||
if (!(s & RQ_NET_SENT))
|
||||
tmp |= 4;
|
||||
if (s & RQ_NET_PENDING)
|
||||
tmp |= 8;
|
||||
if (!(s & RQ_NET_DONE))
|
||||
tmp |= 16;
|
||||
}
|
||||
if ((tmp & show_state) == tmp)
|
||||
continue;
|
||||
show_state |= tmp;
|
||||
seq_printf(m, "%u\t", count);
|
||||
seq_print_minor_vnr_req(m, req, now);
|
||||
if (show_state == 0x1f)
|
||||
break;
|
||||
}
|
||||
spin_unlock_irq(&resource->req_lock);
|
||||
}
|
||||
|
||||
/* TODO: transfer_log and friends should be moved to resource */
|
||||
static int in_flight_summary_show(struct seq_file *m, void *pos)
|
||||
{
|
||||
struct drbd_resource *resource = m->private;
|
||||
struct drbd_connection *connection;
|
||||
unsigned long jif = jiffies;
|
||||
|
||||
connection = first_connection(resource);
|
||||
/* This does not happen, actually.
|
||||
* But be robust and prepare for future code changes. */
|
||||
if (!connection || !kref_get_unless_zero(&connection->kref))
|
||||
return -ESTALE;
|
||||
|
||||
/* BUMP me if you change the file format/content/presentation */
|
||||
seq_printf(m, "v: %u\n\n", 0);
|
||||
|
||||
seq_puts(m, "oldest bitmap IO\n");
|
||||
seq_print_resource_pending_bitmap_io(m, resource, jif);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
seq_puts(m, "meta data IO\n");
|
||||
seq_print_resource_pending_meta_io(m, resource, jif);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
seq_puts(m, "socket buffer stats\n");
|
||||
/* for each connection ... once we have more than one */
|
||||
rcu_read_lock();
|
||||
if (connection->data.socket) {
|
||||
/* open coded SIOCINQ, the "relevant" part */
|
||||
struct tcp_sock *tp = tcp_sk(connection->data.socket->sk);
|
||||
int answ = tp->rcv_nxt - tp->copied_seq;
|
||||
seq_printf(m, "unread receive buffer: %u Byte\n", answ);
|
||||
/* open coded SIOCOUTQ, the "relevant" part */
|
||||
answ = tp->write_seq - tp->snd_una;
|
||||
seq_printf(m, "unacked send buffer: %u Byte\n", answ);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_putc(m, '\n');
|
||||
|
||||
seq_puts(m, "oldest peer requests\n");
|
||||
seq_print_resource_pending_peer_requests(m, resource, jif);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
seq_puts(m, "application requests waiting for activity log\n");
|
||||
seq_print_waiting_for_AL(m, resource, jif);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
seq_puts(m, "oldest application requests\n");
|
||||
seq_print_resource_transfer_log_summary(m, resource, connection, jif);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
jif = jiffies - jif;
|
||||
if (jif)
|
||||
seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif));
|
||||
kref_put(&connection->kref, drbd_destroy_connection);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* simple_positive(file->f_dentry) respectively debugfs_positive(),
|
||||
* but neither is "reachable" from here.
|
||||
* So we have our own inline version of it above. :-( */
|
||||
static inline int debugfs_positive(struct dentry *dentry)
|
||||
{
|
||||
return dentry->d_inode && !d_unhashed(dentry);
|
||||
}
|
||||
|
||||
/* make sure at *open* time that the respective object won't go away. */
|
||||
static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),
|
||||
void *data, struct kref *kref,
|
||||
void (*release)(struct kref *))
|
||||
{
|
||||
struct dentry *parent;
|
||||
int ret = -ESTALE;
|
||||
|
||||
/* Are we still linked,
|
||||
* or has debugfs_remove() already been called? */
|
||||
parent = file->f_dentry->d_parent;
|
||||
/* not sure if this can happen: */
|
||||
if (!parent || !parent->d_inode)
|
||||
goto out;
|
||||
/* serialize with d_delete() */
|
||||
mutex_lock(&parent->d_inode->i_mutex);
|
||||
/* Make sure the object is still alive */
|
||||
if (debugfs_positive(file->f_dentry)
|
||||
&& kref_get_unless_zero(kref))
|
||||
ret = 0;
|
||||
mutex_unlock(&parent->d_inode->i_mutex);
|
||||
if (!ret) {
|
||||
ret = single_open(file, show, data);
|
||||
if (ret)
|
||||
kref_put(kref, release);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int in_flight_summary_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drbd_resource *resource = inode->i_private;
|
||||
return drbd_single_open(file, in_flight_summary_show, resource,
|
||||
&resource->kref, drbd_destroy_resource);
|
||||
}
|
||||
|
||||
static int in_flight_summary_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drbd_resource *resource = inode->i_private;
|
||||
kref_put(&resource->kref, drbd_destroy_resource);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
static const struct file_operations in_flight_summary_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = in_flight_summary_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = in_flight_summary_release,
|
||||
};
|
||||
|
||||
void drbd_debugfs_resource_add(struct drbd_resource *resource)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
if (!drbd_debugfs_resources)
|
||||
return;
|
||||
|
||||
dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
resource->debugfs_res = dentry;
|
||||
|
||||
dentry = debugfs_create_dir("volumes", resource->debugfs_res);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
resource->debugfs_res_volumes = dentry;
|
||||
|
||||
dentry = debugfs_create_dir("connections", resource->debugfs_res);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
resource->debugfs_res_connections = dentry;
|
||||
|
||||
dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
|
||||
resource->debugfs_res, resource,
|
||||
&in_flight_summary_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
resource->debugfs_res_in_flight_summary = dentry;
|
||||
return;
|
||||
|
||||
fail:
|
||||
drbd_debugfs_resource_cleanup(resource);
|
||||
drbd_err(resource, "failed to create debugfs dentry\n");
|
||||
}
|
||||
|
||||
static void drbd_debugfs_remove(struct dentry **dp)
|
||||
{
|
||||
debugfs_remove(*dp);
|
||||
*dp = NULL;
|
||||
}
|
||||
|
||||
void drbd_debugfs_resource_cleanup(struct drbd_resource *resource)
|
||||
{
|
||||
/* it is ok to call debugfs_remove(NULL) */
|
||||
drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary);
|
||||
drbd_debugfs_remove(&resource->debugfs_res_connections);
|
||||
drbd_debugfs_remove(&resource->debugfs_res_volumes);
|
||||
drbd_debugfs_remove(&resource->debugfs_res);
|
||||
}
|
||||
|
||||
static void seq_print_one_timing_detail(struct seq_file *m,
|
||||
const struct drbd_thread_timing_details *tdp,
|
||||
unsigned long now)
|
||||
{
|
||||
struct drbd_thread_timing_details td;
|
||||
/* No locking...
|
||||
* use temporary assignment to get at consistent data. */
|
||||
do {
|
||||
td = *tdp;
|
||||
} while (td.cb_nr != tdp->cb_nr);
|
||||
if (!td.cb_addr)
|
||||
return;
|
||||
seq_printf(m, "%u\t%d\t%s:%u\t%ps\n",
|
||||
td.cb_nr,
|
||||
jiffies_to_msecs(now - td.start_jif),
|
||||
td.caller_fn, td.line,
|
||||
td.cb_addr);
|
||||
}
|
||||
|
||||
static void seq_print_timing_details(struct seq_file *m,
|
||||
const char *title,
|
||||
unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now)
|
||||
{
|
||||
unsigned int start_idx;
|
||||
unsigned int i;
|
||||
|
||||
seq_printf(m, "%s\n", title);
|
||||
/* If not much is going on, this will result in natural ordering.
|
||||
* If it is very busy, we will possibly skip events, or even see wrap
|
||||
* arounds, which could only be avoided with locking.
|
||||
*/
|
||||
start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST;
|
||||
for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++)
|
||||
seq_print_one_timing_detail(m, tdp+i, now);
|
||||
for (i = 0; i < start_idx; i++)
|
||||
seq_print_one_timing_detail(m, tdp+i, now);
|
||||
}
|
||||
|
||||
static int callback_history_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_connection *connection = m->private;
|
||||
unsigned long jif = jiffies;
|
||||
|
||||
/* BUMP me if you change the file format/content/presentation */
|
||||
seq_printf(m, "v: %u\n\n", 0);
|
||||
|
||||
seq_puts(m, "n\tage\tcallsite\tfn\n");
|
||||
seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif);
|
||||
seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int callback_history_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drbd_connection *connection = inode->i_private;
|
||||
return drbd_single_open(file, callback_history_show, connection,
|
||||
&connection->kref, drbd_destroy_connection);
|
||||
}
|
||||
|
||||
static int callback_history_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drbd_connection *connection = inode->i_private;
|
||||
kref_put(&connection->kref, drbd_destroy_connection);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
static const struct file_operations connection_callback_history_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = callback_history_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = callback_history_release,
|
||||
};
|
||||
|
||||
static int connection_oldest_requests_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_connection *connection = m->private;
|
||||
unsigned long now = jiffies;
|
||||
struct drbd_request *r1, *r2;
|
||||
|
||||
/* BUMP me if you change the file format/content/presentation */
|
||||
seq_printf(m, "v: %u\n\n", 0);
|
||||
|
||||
spin_lock_irq(&connection->resource->req_lock);
|
||||
r1 = connection->req_next;
|
||||
if (r1)
|
||||
seq_print_minor_vnr_req(m, r1, now);
|
||||
r2 = connection->req_ack_pending;
|
||||
if (r2 && r2 != r1) {
|
||||
r1 = r2;
|
||||
seq_print_minor_vnr_req(m, r1, now);
|
||||
}
|
||||
r2 = connection->req_not_net_done;
|
||||
if (r2 && r2 != r1)
|
||||
seq_print_minor_vnr_req(m, r2, now);
|
||||
spin_unlock_irq(&connection->resource->req_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int connection_oldest_requests_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drbd_connection *connection = inode->i_private;
|
||||
return drbd_single_open(file, connection_oldest_requests_show, connection,
|
||||
&connection->kref, drbd_destroy_connection);
|
||||
}
|
||||
|
||||
static int connection_oldest_requests_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drbd_connection *connection = inode->i_private;
|
||||
kref_put(&connection->kref, drbd_destroy_connection);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
static const struct file_operations connection_oldest_requests_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = connection_oldest_requests_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = connection_oldest_requests_release,
|
||||
};
|
||||
|
||||
void drbd_debugfs_connection_add(struct drbd_connection *connection)
|
||||
{
|
||||
struct dentry *conns_dir = connection->resource->debugfs_res_connections;
|
||||
struct dentry *dentry;
|
||||
if (!conns_dir)
|
||||
return;
|
||||
|
||||
/* Once we enable mutliple peers,
|
||||
* these connections will have descriptive names.
|
||||
* For now, it is just the one connection to the (only) "peer". */
|
||||
dentry = debugfs_create_dir("peer", conns_dir);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
connection->debugfs_conn = dentry;
|
||||
|
||||
dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
|
||||
connection->debugfs_conn, connection,
|
||||
&connection_callback_history_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
connection->debugfs_conn_callback_history = dentry;
|
||||
|
||||
dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
|
||||
connection->debugfs_conn, connection,
|
||||
&connection_oldest_requests_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
connection->debugfs_conn_oldest_requests = dentry;
|
||||
return;
|
||||
|
||||
fail:
|
||||
drbd_debugfs_connection_cleanup(connection);
|
||||
drbd_err(connection, "failed to create debugfs dentry\n");
|
||||
}
|
||||
|
||||
void drbd_debugfs_connection_cleanup(struct drbd_connection *connection)
|
||||
{
|
||||
drbd_debugfs_remove(&connection->debugfs_conn_callback_history);
|
||||
drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests);
|
||||
drbd_debugfs_remove(&connection->debugfs_conn);
|
||||
}
|
||||
|
||||
static void resync_dump_detail(struct seq_file *m, struct lc_element *e)
|
||||
{
|
||||
struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
|
||||
|
||||
seq_printf(m, "%5d %s %s %s", bme->rs_left,
|
||||
test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------",
|
||||
test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------",
|
||||
test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------"
|
||||
);
|
||||
}
|
||||
|
||||
static int device_resync_extents_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_device *device = m->private;
|
||||
|
||||
/* BUMP me if you change the file format/content/presentation */
|
||||
seq_printf(m, "v: %u\n\n", 0);
|
||||
|
||||
if (get_ldev_if_state(device, D_FAILED)) {
|
||||
lc_seq_printf_stats(m, device->resync);
|
||||
lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail);
|
||||
put_ldev(device);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_act_log_extents_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_device *device = m->private;
|
||||
|
||||
/* BUMP me if you change the file format/content/presentation */
|
||||
seq_printf(m, "v: %u\n\n", 0);
|
||||
|
||||
if (get_ldev_if_state(device, D_FAILED)) {
|
||||
lc_seq_printf_stats(m, device->act_log);
|
||||
lc_seq_dump_details(m, device->act_log, "", NULL);
|
||||
put_ldev(device);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_oldest_requests_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_device *device = m->private;
|
||||
struct drbd_resource *resource = device->resource;
|
||||
unsigned long now = jiffies;
|
||||
struct drbd_request *r1, *r2;
|
||||
int i;
|
||||
|
||||
/* BUMP me if you change the file format/content/presentation */
|
||||
seq_printf(m, "v: %u\n\n", 0);
|
||||
|
||||
seq_puts(m, RQ_HDR);
|
||||
spin_lock_irq(&resource->req_lock);
|
||||
/* WRITE, then READ */
|
||||
for (i = 1; i >= 0; --i) {
|
||||
r1 = list_first_entry_or_null(&device->pending_master_completion[i],
|
||||
struct drbd_request, req_pending_master_completion);
|
||||
r2 = list_first_entry_or_null(&device->pending_completion[i],
|
||||
struct drbd_request, req_pending_local);
|
||||
if (r1)
|
||||
seq_print_one_request(m, r1, now);
|
||||
if (r2 && r2 != r1)
|
||||
seq_print_one_request(m, r2, now);
|
||||
}
|
||||
spin_unlock_irq(&resource->req_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_data_gen_id_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_device *device = m->private;
|
||||
struct drbd_md *md;
|
||||
enum drbd_uuid_index idx;
|
||||
|
||||
if (!get_ldev_if_state(device, D_FAILED))
|
||||
return -ENODEV;
|
||||
|
||||
md = &device->ldev->md;
|
||||
spin_lock_irq(&md->uuid_lock);
|
||||
for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) {
|
||||
seq_printf(m, "0x%016llX\n", md->uuid[idx]);
|
||||
}
|
||||
spin_unlock_irq(&md->uuid_lock);
|
||||
put_ldev(device);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define drbd_debugfs_device_attr(name) \
|
||||
static int device_ ## name ## _open(struct inode *inode, struct file *file) \
|
||||
{ \
|
||||
struct drbd_device *device = inode->i_private; \
|
||||
return drbd_single_open(file, device_ ## name ## _show, device, \
|
||||
&device->kref, drbd_destroy_device); \
|
||||
} \
|
||||
static int device_ ## name ## _release(struct inode *inode, struct file *file) \
|
||||
{ \
|
||||
struct drbd_device *device = inode->i_private; \
|
||||
kref_put(&device->kref, drbd_destroy_device); \
|
||||
return single_release(inode, file); \
|
||||
} \
|
||||
static const struct file_operations device_ ## name ## _fops = { \
|
||||
.owner = THIS_MODULE, \
|
||||
.open = device_ ## name ## _open, \
|
||||
.read = seq_read, \
|
||||
.llseek = seq_lseek, \
|
||||
.release = device_ ## name ## _release, \
|
||||
};
|
||||
|
||||
drbd_debugfs_device_attr(oldest_requests)
|
||||
drbd_debugfs_device_attr(act_log_extents)
|
||||
drbd_debugfs_device_attr(resync_extents)
|
||||
drbd_debugfs_device_attr(data_gen_id)
|
||||
|
||||
void drbd_debugfs_device_add(struct drbd_device *device)
|
||||
{
|
||||
struct dentry *vols_dir = device->resource->debugfs_res_volumes;
|
||||
char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */
|
||||
char vnr_buf[8]; /* volume number vnr is even 16 bit only; */
|
||||
char *slink_name = NULL;
|
||||
|
||||
struct dentry *dentry;
|
||||
if (!vols_dir || !drbd_debugfs_minors)
|
||||
return;
|
||||
|
||||
snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr);
|
||||
dentry = debugfs_create_dir(vnr_buf, vols_dir);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
device->debugfs_vol = dentry;
|
||||
|
||||
snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor);
|
||||
slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u",
|
||||
device->resource->name, device->vnr);
|
||||
if (!slink_name)
|
||||
goto fail;
|
||||
dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name);
|
||||
kfree(slink_name);
|
||||
slink_name = NULL;
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
device->debugfs_minor = dentry;
|
||||
|
||||
#define DCF(name) do { \
|
||||
dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
|
||||
device->debugfs_vol, device, \
|
||||
&device_ ## name ## _fops); \
|
||||
if (IS_ERR_OR_NULL(dentry)) \
|
||||
goto fail; \
|
||||
device->debugfs_vol_ ## name = dentry; \
|
||||
} while (0)
|
||||
|
||||
DCF(oldest_requests);
|
||||
DCF(act_log_extents);
|
||||
DCF(resync_extents);
|
||||
DCF(data_gen_id);
|
||||
#undef DCF
|
||||
return;
|
||||
|
||||
fail:
|
||||
drbd_debugfs_device_cleanup(device);
|
||||
drbd_err(device, "failed to create debugfs entries\n");
|
||||
}
|
||||
|
||||
void drbd_debugfs_device_cleanup(struct drbd_device *device)
|
||||
{
|
||||
drbd_debugfs_remove(&device->debugfs_minor);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_oldest_requests);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
|
||||
drbd_debugfs_remove(&device->debugfs_vol);
|
||||
}
|
||||
|
||||
void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct dentry *conn_dir = peer_device->connection->debugfs_conn;
|
||||
struct dentry *dentry;
|
||||
char vnr_buf[8];
|
||||
|
||||
if (!conn_dir)
|
||||
return;
|
||||
|
||||
snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr);
|
||||
dentry = debugfs_create_dir(vnr_buf, conn_dir);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
peer_device->debugfs_peer_dev = dentry;
|
||||
return;
|
||||
|
||||
fail:
|
||||
drbd_debugfs_peer_device_cleanup(peer_device);
|
||||
drbd_err(peer_device, "failed to create debugfs entries\n");
|
||||
}
|
||||
|
||||
void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
drbd_debugfs_remove(&peer_device->debugfs_peer_dev);
|
||||
}
|
||||
|
||||
static int drbd_version_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
seq_printf(m, "# %s\n", drbd_buildtag());
|
||||
seq_printf(m, "VERSION=%s\n", REL_VERSION);
|
||||
seq_printf(m, "API_VERSION=%u\n", API_VERSION);
|
||||
seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN);
|
||||
seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int drbd_version_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, drbd_version_show, NULL);
|
||||
}
|
||||
|
||||
static struct file_operations drbd_version_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = drbd_version_open,
|
||||
.llseek = seq_lseek,
|
||||
.read = seq_read,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
/* not __exit, may be indirectly called
|
||||
* from the module-load-failure path as well. */
|
||||
void drbd_debugfs_cleanup(void)
|
||||
{
|
||||
drbd_debugfs_remove(&drbd_debugfs_resources);
|
||||
drbd_debugfs_remove(&drbd_debugfs_minors);
|
||||
drbd_debugfs_remove(&drbd_debugfs_version);
|
||||
drbd_debugfs_remove(&drbd_debugfs_root);
|
||||
}
|
||||
|
||||
int __init drbd_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
|
||||
dentry = debugfs_create_dir("drbd", NULL);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
drbd_debugfs_root = dentry;
|
||||
|
||||
dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
drbd_debugfs_version = dentry;
|
||||
|
||||
dentry = debugfs_create_dir("resources", drbd_debugfs_root);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
drbd_debugfs_resources = dentry;
|
||||
|
||||
dentry = debugfs_create_dir("minors", drbd_debugfs_root);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
drbd_debugfs_minors = dentry;
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
drbd_debugfs_cleanup();
|
||||
if (dentry)
|
||||
return PTR_ERR(dentry);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
39
drivers/block/drbd/drbd_debugfs.h
Normal file
39
drivers/block/drbd/drbd_debugfs.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "drbd_int.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
int __init drbd_debugfs_init(void);
|
||||
void drbd_debugfs_cleanup(void);
|
||||
|
||||
void drbd_debugfs_resource_add(struct drbd_resource *resource);
|
||||
void drbd_debugfs_resource_cleanup(struct drbd_resource *resource);
|
||||
|
||||
void drbd_debugfs_connection_add(struct drbd_connection *connection);
|
||||
void drbd_debugfs_connection_cleanup(struct drbd_connection *connection);
|
||||
|
||||
void drbd_debugfs_device_add(struct drbd_device *device);
|
||||
void drbd_debugfs_device_cleanup(struct drbd_device *device);
|
||||
|
||||
void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device);
|
||||
void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device);
|
||||
#else
|
||||
|
||||
static inline int __init drbd_debugfs_init(void) { return -ENODEV; }
|
||||
static inline void drbd_debugfs_cleanup(void) { }
|
||||
|
||||
static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { }
|
||||
static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { }
|
||||
|
||||
static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { }
|
||||
static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { }
|
||||
|
||||
static inline void drbd_debugfs_device_add(struct drbd_device *device) { }
|
||||
static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { }
|
||||
|
||||
static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { }
|
||||
static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { }
|
||||
|
||||
#endif
|
2351
drivers/block/drbd/drbd_int.h
Normal file
2351
drivers/block/drbd/drbd_int.h
Normal file
File diff suppressed because it is too large
Load diff
179
drivers/block/drbd/drbd_interval.c
Normal file
179
drivers/block/drbd/drbd_interval.c
Normal file
|
@ -0,0 +1,179 @@
|
|||
#include <asm/bug.h>
|
||||
#include <linux/rbtree_augmented.h>
|
||||
#include "drbd_interval.h"
|
||||
|
||||
/**
|
||||
* interval_end - return end of @node
|
||||
*/
|
||||
static inline
|
||||
sector_t interval_end(struct rb_node *node)
|
||||
{
|
||||
struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
|
||||
return this->end;
|
||||
}
|
||||
|
||||
/**
|
||||
* compute_subtree_last - compute end of @node
|
||||
*
|
||||
* The end of an interval is the highest (start + (size >> 9)) value of this
|
||||
* node and of its children. Called for @node and its parents whenever the end
|
||||
* may have changed.
|
||||
*/
|
||||
static inline sector_t
|
||||
compute_subtree_last(struct drbd_interval *node)
|
||||
{
|
||||
sector_t max = node->sector + (node->size >> 9);
|
||||
|
||||
if (node->rb.rb_left) {
|
||||
sector_t left = interval_end(node->rb.rb_left);
|
||||
if (left > max)
|
||||
max = left;
|
||||
}
|
||||
if (node->rb.rb_right) {
|
||||
sector_t right = interval_end(node->rb.rb_right);
|
||||
if (right > max)
|
||||
max = right;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb,
|
||||
sector_t, end, compute_subtree_last);
|
||||
|
||||
/**
|
||||
* drbd_insert_interval - insert a new interval into a tree
|
||||
*/
|
||||
bool
|
||||
drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
struct rb_node **new = &root->rb_node, *parent = NULL;
|
||||
sector_t this_end = this->sector + (this->size >> 9);
|
||||
|
||||
BUG_ON(!IS_ALIGNED(this->size, 512));
|
||||
|
||||
while (*new) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(*new, struct drbd_interval, rb);
|
||||
|
||||
parent = *new;
|
||||
if (here->end < this_end)
|
||||
here->end = this_end;
|
||||
if (this->sector < here->sector)
|
||||
new = &(*new)->rb_left;
|
||||
else if (this->sector > here->sector)
|
||||
new = &(*new)->rb_right;
|
||||
else if (this < here)
|
||||
new = &(*new)->rb_left;
|
||||
else if (this > here)
|
||||
new = &(*new)->rb_right;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
this->end = this_end;
|
||||
rb_link_node(&this->rb, parent, new);
|
||||
rb_insert_augmented(&this->rb, root, &augment_callbacks);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_contains_interval - check if a tree contains a given interval
|
||||
* @sector: start sector of @interval
|
||||
* @interval: may not be a valid pointer
|
||||
*
|
||||
* Returns if the tree contains the node @interval with start sector @start.
|
||||
* Does not dereference @interval until @interval is known to be a valid object
|
||||
* in @tree. Returns %false if @interval is in the tree but with a different
|
||||
* sector number.
|
||||
*/
|
||||
bool
|
||||
drbd_contains_interval(struct rb_root *root, sector_t sector,
|
||||
struct drbd_interval *interval)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
|
||||
while (node) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(node, struct drbd_interval, rb);
|
||||
|
||||
if (sector < here->sector)
|
||||
node = node->rb_left;
|
||||
else if (sector > here->sector)
|
||||
node = node->rb_right;
|
||||
else if (interval < here)
|
||||
node = node->rb_left;
|
||||
else if (interval > here)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_remove_interval - remove an interval from a tree
|
||||
*/
|
||||
void
|
||||
drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
rb_erase_augmented(&this->rb, root, &augment_callbacks);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_find_overlap - search for an interval overlapping with [sector, sector + size)
|
||||
* @sector: start sector
|
||||
* @size: size, aligned to 512 bytes
|
||||
*
|
||||
* Returns an interval overlapping with [sector, sector + size), or NULL if
|
||||
* there is none. When there is more than one overlapping interval in the
|
||||
* tree, the interval with the lowest start sector is returned, and all other
|
||||
* overlapping intervals will be on the right side of the tree, reachable with
|
||||
* rb_next().
|
||||
*/
|
||||
struct drbd_interval *
|
||||
drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct drbd_interval *overlap = NULL;
|
||||
sector_t end = sector + (size >> 9);
|
||||
|
||||
BUG_ON(!IS_ALIGNED(size, 512));
|
||||
|
||||
while (node) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(node, struct drbd_interval, rb);
|
||||
|
||||
if (node->rb_left &&
|
||||
sector < interval_end(node->rb_left)) {
|
||||
/* Overlap if any must be on left side */
|
||||
node = node->rb_left;
|
||||
} else if (here->sector < end &&
|
||||
sector < here->sector + (here->size >> 9)) {
|
||||
overlap = here;
|
||||
break;
|
||||
} else if (sector >= here->sector) {
|
||||
/* Overlap if any must be on right side */
|
||||
node = node->rb_right;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
return overlap;
|
||||
}
|
||||
|
||||
struct drbd_interval *
|
||||
drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
|
||||
{
|
||||
sector_t end = sector + (size >> 9);
|
||||
struct rb_node *node;
|
||||
|
||||
for (;;) {
|
||||
node = rb_next(&i->rb);
|
||||
if (!node)
|
||||
return NULL;
|
||||
i = rb_entry(node, struct drbd_interval, rb);
|
||||
if (i->sector >= end)
|
||||
return NULL;
|
||||
if (sector < i->sector + (i->size >> 9))
|
||||
return i;
|
||||
}
|
||||
}
|
42
drivers/block/drbd/drbd_interval.h
Normal file
42
drivers/block/drbd/drbd_interval.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#ifndef __DRBD_INTERVAL_H
|
||||
#define __DRBD_INTERVAL_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
struct drbd_interval {
|
||||
struct rb_node rb;
|
||||
sector_t sector; /* start sector of the interval */
|
||||
unsigned int size; /* size in bytes */
|
||||
sector_t end; /* highest interval end in subtree */
|
||||
int local:1 /* local or remote request? */;
|
||||
int waiting:1; /* someone is waiting for this to complete */
|
||||
int completed:1; /* this has been completed already;
|
||||
* ignore for conflict detection */
|
||||
};
|
||||
|
||||
static inline void drbd_clear_interval(struct drbd_interval *i)
|
||||
{
|
||||
RB_CLEAR_NODE(&i->rb);
|
||||
}
|
||||
|
||||
static inline bool drbd_interval_empty(struct drbd_interval *i)
|
||||
{
|
||||
return RB_EMPTY_NODE(&i->rb);
|
||||
}
|
||||
|
||||
extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
|
||||
extern bool drbd_contains_interval(struct rb_root *, sector_t,
|
||||
struct drbd_interval *);
|
||||
extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
|
||||
extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
|
||||
unsigned int);
|
||||
extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
|
||||
unsigned int);
|
||||
|
||||
#define drbd_for_each_overlap(i, root, sector, size) \
|
||||
for (i = drbd_find_overlap(root, sector, size); \
|
||||
i; \
|
||||
i = drbd_next_overlap(i, sector, size))
|
||||
|
||||
#endif /* __DRBD_INTERVAL_H */
|
3856
drivers/block/drbd/drbd_main.c
Normal file
3856
drivers/block/drbd/drbd_main.c
Normal file
File diff suppressed because it is too large
Load diff
3682
drivers/block/drbd/drbd_nl.c
Normal file
3682
drivers/block/drbd/drbd_nl.c
Normal file
File diff suppressed because it is too large
Load diff
54
drivers/block/drbd/drbd_nla.c
Normal file
54
drivers/block/drbd/drbd_nla.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/drbd_genl_api.h>
|
||||
#include "drbd_nla.h"
|
||||
|
||||
static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
|
||||
{
|
||||
struct nlattr *head = nla_data(nla);
|
||||
int len = nla_len(nla);
|
||||
int rem;
|
||||
|
||||
/*
|
||||
* validate_nla (called from nla_parse_nested) ignores attributes
|
||||
* beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
|
||||
* In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
|
||||
* flag set also, check and remove that flag before calling
|
||||
* nla_parse_nested.
|
||||
*/
|
||||
|
||||
nla_for_each_attr(nla, head, len, rem) {
|
||||
if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
|
||||
nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
|
||||
if (nla_type(nla) > maxtype)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (!err)
|
||||
err = nla_parse_nested(tb, maxtype, nla, policy);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
|
||||
{
|
||||
int err;
|
||||
/*
|
||||
* If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
|
||||
* we don't know about that attribute, reject all the nested
|
||||
* attributes.
|
||||
*/
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
return nla_find_nested(nla, attrtype);
|
||||
}
|
8
drivers/block/drbd/drbd_nla.h
Normal file
8
drivers/block/drbd/drbd_nla.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
#ifndef __DRBD_NLA_H
|
||||
#define __DRBD_NLA_H
|
||||
|
||||
extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy);
|
||||
extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
|
||||
|
||||
#endif /* __DRBD_NLA_H */
|
368
drivers/block/drbd/drbd_proc.c
Normal file
368
drivers/block/drbd/drbd_proc.c
Normal file
|
@ -0,0 +1,368 @@
|
|||
/*
|
||||
drbd_proc.c
|
||||
|
||||
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
|
||||
|
||||
Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
|
||||
Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
|
||||
Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
|
||||
|
||||
drbd is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
drbd is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with drbd; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/drbd.h>
|
||||
#include "drbd_int.h"
|
||||
|
||||
static int drbd_proc_open(struct inode *inode, struct file *file);
|
||||
static int drbd_proc_release(struct inode *inode, struct file *file);
|
||||
|
||||
|
||||
struct proc_dir_entry *drbd_proc;
|
||||
const struct file_operations drbd_proc_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = drbd_proc_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = drbd_proc_release,
|
||||
};
|
||||
|
||||
static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
|
||||
{
|
||||
/* v is in kB/sec. We don't expect TiByte/sec yet. */
|
||||
if (unlikely(v >= 1000000)) {
|
||||
/* cool: > GiByte/s */
|
||||
seq_printf(seq, "%ld,", v / 1000000);
|
||||
v %= 1000000;
|
||||
seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
|
||||
} else if (likely(v >= 1000))
|
||||
seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
|
||||
else
|
||||
seq_printf(seq, "%ld", v);
|
||||
}
|
||||
|
||||
static void drbd_get_syncer_progress(struct drbd_device *device,
|
||||
union drbd_dev_state state, unsigned long *rs_total,
|
||||
unsigned long *bits_left, unsigned int *per_mil_done)
|
||||
{
|
||||
/* this is to break it at compile time when we change that, in case we
|
||||
* want to support more than (1<<32) bits on a 32bit arch. */
|
||||
typecheck(unsigned long, device->rs_total);
|
||||
*rs_total = device->rs_total;
|
||||
|
||||
/* note: both rs_total and rs_left are in bits, i.e. in
|
||||
* units of BM_BLOCK_SIZE.
|
||||
* for the percentage, we don't care. */
|
||||
|
||||
if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
|
||||
*bits_left = device->ov_left;
|
||||
else
|
||||
*bits_left = drbd_bm_total_weight(device) - device->rs_failed;
|
||||
/* >> 10 to prevent overflow,
|
||||
* +1 to prevent division by zero */
|
||||
if (*bits_left > *rs_total) {
|
||||
/* D'oh. Maybe a logic bug somewhere. More likely just a race
|
||||
* between state change and reset of rs_total.
|
||||
*/
|
||||
*bits_left = *rs_total;
|
||||
*per_mil_done = *rs_total ? 0 : 1000;
|
||||
} else {
|
||||
/* Make sure the division happens in long context.
|
||||
* We allow up to one petabyte storage right now,
|
||||
* at a granularity of 4k per bit that is 2**38 bits.
|
||||
* After shift right and multiplication by 1000,
|
||||
* this should still fit easily into a 32bit long,
|
||||
* so we don't need a 64bit division on 32bit arch.
|
||||
* Note: currently we don't support such large bitmaps on 32bit
|
||||
* arch anyways, but no harm done to be prepared for it here.
|
||||
*/
|
||||
unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
|
||||
unsigned long left = *bits_left >> shift;
|
||||
unsigned long total = 1UL + (*rs_total >> shift);
|
||||
unsigned long tmp = 1000UL - left * 1000UL/total;
|
||||
*per_mil_done = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*lge
|
||||
* progress bars shamelessly adapted from driver/md/md.c
|
||||
* output looks like
|
||||
* [=====>..............] 33.5% (23456/123456)
|
||||
* finish: 2:20:20 speed: 6,345 (6,456) K/sec
|
||||
*/
|
||||
static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
|
||||
union drbd_dev_state state)
|
||||
{
|
||||
unsigned long db, dt, dbdt, rt, rs_total, rs_left;
|
||||
unsigned int res;
|
||||
int i, x, y;
|
||||
int stalled = 0;
|
||||
|
||||
drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
|
||||
|
||||
x = res/50;
|
||||
y = 20-x;
|
||||
seq_printf(seq, "\t[");
|
||||
for (i = 1; i < x; i++)
|
||||
seq_printf(seq, "=");
|
||||
seq_printf(seq, ">");
|
||||
for (i = 0; i < y; i++)
|
||||
seq_printf(seq, ".");
|
||||
seq_printf(seq, "] ");
|
||||
|
||||
if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
|
||||
seq_printf(seq, "verified:");
|
||||
else
|
||||
seq_printf(seq, "sync'ed:");
|
||||
seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
|
||||
|
||||
/* if more than a few GB, display in MB */
|
||||
if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
|
||||
seq_printf(seq, "(%lu/%lu)M",
|
||||
(unsigned long) Bit2KB(rs_left >> 10),
|
||||
(unsigned long) Bit2KB(rs_total >> 10));
|
||||
else
|
||||
seq_printf(seq, "(%lu/%lu)K",
|
||||
(unsigned long) Bit2KB(rs_left),
|
||||
(unsigned long) Bit2KB(rs_total));
|
||||
|
||||
seq_printf(seq, "\n\t");
|
||||
|
||||
/* see drivers/md/md.c
|
||||
* We do not want to overflow, so the order of operands and
|
||||
* the * 100 / 100 trick are important. We do a +1 to be
|
||||
* safe against division by zero. We only estimate anyway.
|
||||
*
|
||||
* dt: time from mark until now
|
||||
* db: blocks written from mark until now
|
||||
* rt: remaining time
|
||||
*/
|
||||
/* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
|
||||
* at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
|
||||
* least DRBD_SYNC_MARK_STEP time before it will be modified. */
|
||||
/* ------------------------ ~18s average ------------------------ */
|
||||
i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
|
||||
dt = (jiffies - device->rs_mark_time[i]) / HZ;
|
||||
if (dt > 180)
|
||||
stalled = 1;
|
||||
|
||||
if (!dt)
|
||||
dt++;
|
||||
db = device->rs_mark_left[i] - rs_left;
|
||||
rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
|
||||
|
||||
seq_printf(seq, "finish: %lu:%02lu:%02lu",
|
||||
rt / 3600, (rt % 3600) / 60, rt % 60);
|
||||
|
||||
dbdt = Bit2KB(db/dt);
|
||||
seq_printf(seq, " speed: ");
|
||||
seq_printf_with_thousands_grouping(seq, dbdt);
|
||||
seq_printf(seq, " (");
|
||||
/* ------------------------- ~3s average ------------------------ */
|
||||
if (proc_details >= 1) {
|
||||
/* this is what drbd_rs_should_slow_down() uses */
|
||||
i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
|
||||
dt = (jiffies - device->rs_mark_time[i]) / HZ;
|
||||
if (!dt)
|
||||
dt++;
|
||||
db = device->rs_mark_left[i] - rs_left;
|
||||
dbdt = Bit2KB(db/dt);
|
||||
seq_printf_with_thousands_grouping(seq, dbdt);
|
||||
seq_printf(seq, " -- ");
|
||||
}
|
||||
|
||||
/* --------------------- long term average ---------------------- */
|
||||
/* mean speed since syncer started
|
||||
* we do account for PausedSync periods */
|
||||
dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
|
||||
if (dt == 0)
|
||||
dt = 1;
|
||||
db = rs_total - rs_left;
|
||||
dbdt = Bit2KB(db/dt);
|
||||
seq_printf_with_thousands_grouping(seq, dbdt);
|
||||
seq_printf(seq, ")");
|
||||
|
||||
if (state.conn == C_SYNC_TARGET ||
|
||||
state.conn == C_VERIFY_S) {
|
||||
seq_printf(seq, " want: ");
|
||||
seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
|
||||
}
|
||||
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
|
||||
|
||||
if (proc_details >= 1) {
|
||||
/* 64 bit:
|
||||
* we convert to sectors in the display below. */
|
||||
unsigned long bm_bits = drbd_bm_bits(device);
|
||||
unsigned long bit_pos;
|
||||
unsigned long long stop_sector = 0;
|
||||
if (state.conn == C_VERIFY_S ||
|
||||
state.conn == C_VERIFY_T) {
|
||||
bit_pos = bm_bits - device->ov_left;
|
||||
if (verify_can_do_stop_sector(device))
|
||||
stop_sector = device->ov_stop_sector;
|
||||
} else
|
||||
bit_pos = device->bm_resync_fo;
|
||||
/* Total sectors may be slightly off for oddly
|
||||
* sized devices. So what. */
|
||||
seq_printf(seq,
|
||||
"\t%3d%% sector pos: %llu/%llu",
|
||||
(int)(bit_pos / (bm_bits/100+1)),
|
||||
(unsigned long long)bit_pos * BM_SECT_PER_BIT,
|
||||
(unsigned long long)bm_bits * BM_SECT_PER_BIT);
|
||||
if (stop_sector != 0 && stop_sector != ULLONG_MAX)
|
||||
seq_printf(seq, " stop sector: %llu", stop_sector);
|
||||
seq_printf(seq, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
int i, prev_i = -1;
|
||||
const char *sn;
|
||||
struct drbd_device *device;
|
||||
struct net_conf *nc;
|
||||
union drbd_dev_state state;
|
||||
char wp;
|
||||
|
||||
static char write_ordering_chars[] = {
|
||||
[WO_none] = 'n',
|
||||
[WO_drain_io] = 'd',
|
||||
[WO_bdev_flush] = 'f',
|
||||
};
|
||||
|
||||
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
|
||||
API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
|
||||
|
||||
/*
|
||||
cs .. connection state
|
||||
ro .. node role (local/remote)
|
||||
ds .. disk state (local/remote)
|
||||
protocol
|
||||
various flags
|
||||
ns .. network send
|
||||
nr .. network receive
|
||||
dw .. disk write
|
||||
dr .. disk read
|
||||
al .. activity log write count
|
||||
bm .. bitmap update write count
|
||||
pe .. pending (waiting for ack or data reply)
|
||||
ua .. unack'd (still need to send ack or data reply)
|
||||
ap .. application requests accepted, but not yet completed
|
||||
ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending
|
||||
wo .. write ordering mode currently in use
|
||||
oos .. known out-of-sync kB
|
||||
*/
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&drbd_devices, device, i) {
|
||||
if (prev_i != i - 1)
|
||||
seq_printf(seq, "\n");
|
||||
prev_i = i;
|
||||
|
||||
state = device->state;
|
||||
sn = drbd_conn_str(state.conn);
|
||||
|
||||
if (state.conn == C_STANDALONE &&
|
||||
state.disk == D_DISKLESS &&
|
||||
state.role == R_SECONDARY) {
|
||||
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
|
||||
} else {
|
||||
/* reset device->congestion_reason */
|
||||
bdi_rw_congested(&device->rq_queue->backing_dev_info);
|
||||
|
||||
nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
|
||||
wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
|
||||
seq_printf(seq,
|
||||
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
|
||||
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
|
||||
"lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
|
||||
i, sn,
|
||||
drbd_role_str(state.role),
|
||||
drbd_role_str(state.peer),
|
||||
drbd_disk_str(state.disk),
|
||||
drbd_disk_str(state.pdsk),
|
||||
wp,
|
||||
drbd_suspended(device) ? 's' : 'r',
|
||||
state.aftr_isp ? 'a' : '-',
|
||||
state.peer_isp ? 'p' : '-',
|
||||
state.user_isp ? 'u' : '-',
|
||||
device->congestion_reason ?: '-',
|
||||
test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
|
||||
device->send_cnt/2,
|
||||
device->recv_cnt/2,
|
||||
device->writ_cnt/2,
|
||||
device->read_cnt/2,
|
||||
device->al_writ_cnt,
|
||||
device->bm_writ_cnt,
|
||||
atomic_read(&device->local_cnt),
|
||||
atomic_read(&device->ap_pending_cnt) +
|
||||
atomic_read(&device->rs_pending_cnt),
|
||||
atomic_read(&device->unacked_cnt),
|
||||
atomic_read(&device->ap_bio_cnt),
|
||||
first_peer_device(device)->connection->epochs,
|
||||
write_ordering_chars[device->resource->write_ordering]
|
||||
);
|
||||
seq_printf(seq, " oos:%llu\n",
|
||||
Bit2KB((unsigned long long)
|
||||
drbd_bm_total_weight(device)));
|
||||
}
|
||||
if (state.conn == C_SYNC_SOURCE ||
|
||||
state.conn == C_SYNC_TARGET ||
|
||||
state.conn == C_VERIFY_S ||
|
||||
state.conn == C_VERIFY_T)
|
||||
drbd_syncer_progress(device, seq, state);
|
||||
|
||||
if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
|
||||
lc_seq_printf_stats(seq, device->resync);
|
||||
lc_seq_printf_stats(seq, device->act_log);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
if (proc_details >= 2)
|
||||
seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int drbd_proc_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (try_module_get(THIS_MODULE)) {
|
||||
err = single_open(file, drbd_seq_show, NULL);
|
||||
if (err)
|
||||
module_put(THIS_MODULE);
|
||||
return err;
|
||||
}
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static int drbd_proc_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
module_put(THIS_MODULE);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
/* PROC FS stuff end */
|
307
drivers/block/drbd/drbd_protocol.h
Normal file
307
drivers/block/drbd/drbd_protocol.h
Normal file
|
@ -0,0 +1,307 @@
|
|||
#ifndef __DRBD_PROTOCOL_H
|
||||
#define __DRBD_PROTOCOL_H
|
||||
|
||||
enum drbd_packet {
|
||||
/* receiver (data socket) */
|
||||
P_DATA = 0x00,
|
||||
P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */
|
||||
P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */
|
||||
P_BARRIER = 0x03,
|
||||
P_BITMAP = 0x04,
|
||||
P_BECOME_SYNC_TARGET = 0x05,
|
||||
P_BECOME_SYNC_SOURCE = 0x06,
|
||||
P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */
|
||||
P_DATA_REQUEST = 0x08, /* Used to ask for a data block */
|
||||
P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */
|
||||
P_SYNC_PARAM = 0x0a,
|
||||
P_PROTOCOL = 0x0b,
|
||||
P_UUIDS = 0x0c,
|
||||
P_SIZES = 0x0d,
|
||||
P_STATE = 0x0e,
|
||||
P_SYNC_UUID = 0x0f,
|
||||
P_AUTH_CHALLENGE = 0x10,
|
||||
P_AUTH_RESPONSE = 0x11,
|
||||
P_STATE_CHG_REQ = 0x12,
|
||||
|
||||
/* asender (meta socket */
|
||||
P_PING = 0x13,
|
||||
P_PING_ACK = 0x14,
|
||||
P_RECV_ACK = 0x15, /* Used in protocol B */
|
||||
P_WRITE_ACK = 0x16, /* Used in protocol C */
|
||||
P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
|
||||
P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */
|
||||
P_NEG_ACK = 0x19, /* Sent if local disk is unusable */
|
||||
P_NEG_DREPLY = 0x1a, /* Local disk is broken... */
|
||||
P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */
|
||||
P_BARRIER_ACK = 0x1c,
|
||||
P_STATE_CHG_REPLY = 0x1d,
|
||||
|
||||
/* "new" commands, no longer fitting into the ordering scheme above */
|
||||
|
||||
P_OV_REQUEST = 0x1e, /* data socket */
|
||||
P_OV_REPLY = 0x1f,
|
||||
P_OV_RESULT = 0x20, /* meta socket */
|
||||
P_CSUM_RS_REQUEST = 0x21, /* data socket */
|
||||
P_RS_IS_IN_SYNC = 0x22, /* meta socket */
|
||||
P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
|
||||
P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */
|
||||
/* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
|
||||
/* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
|
||||
P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
|
||||
P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
|
||||
P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
|
||||
P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */
|
||||
P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
|
||||
P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
|
||||
P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
|
||||
/* 0x2e to 0x30 reserved, used in drbd 9 */
|
||||
|
||||
/* REQ_DISCARD. We used "discard" in different contexts before,
|
||||
* which is why I chose TRIM here, to disambiguate. */
|
||||
P_TRIM = 0x31,
|
||||
|
||||
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
|
||||
P_MAX_OPT_CMD = 0x101,
|
||||
|
||||
/* special command ids for handshake */
|
||||
|
||||
P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */
|
||||
P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */
|
||||
|
||||
P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */
|
||||
};
|
||||
|
||||
#ifndef __packed
|
||||
#define __packed __attribute__((packed))
|
||||
#endif
|
||||
|
||||
/* This is the layout for a packet on the wire.
|
||||
* The byteorder is the network byte order.
|
||||
* (except block_id and barrier fields.
|
||||
* these are pointers to local structs
|
||||
* and have no relevance for the partner,
|
||||
* which just echoes them as received.)
|
||||
*
|
||||
* NOTE that the payload starts at a long aligned offset,
|
||||
* regardless of 32 or 64 bit arch!
|
||||
*/
|
||||
struct p_header80 {
|
||||
u32 magic;
|
||||
u16 command;
|
||||
u16 length; /* bytes of data after this header */
|
||||
} __packed;
|
||||
|
||||
/* Header for big packets, Used for data packets exceeding 64kB */
|
||||
struct p_header95 {
|
||||
u16 magic; /* use DRBD_MAGIC_BIG here */
|
||||
u16 command;
|
||||
u32 length;
|
||||
} __packed;
|
||||
|
||||
struct p_header100 {
|
||||
u32 magic;
|
||||
u16 volume;
|
||||
u16 command;
|
||||
u32 length;
|
||||
u32 pad;
|
||||
} __packed;
|
||||
|
||||
/* these defines must not be changed without changing the protocol version */
|
||||
#define DP_HARDBARRIER 1 /* depricated */
|
||||
#define DP_RW_SYNC 2 /* equals REQ_SYNC */
|
||||
#define DP_MAY_SET_IN_SYNC 4
|
||||
#define DP_UNPLUG 8 /* not used anymore */
|
||||
#define DP_FUA 16 /* equals REQ_FUA */
|
||||
#define DP_FLUSH 32 /* equals REQ_FLUSH */
|
||||
#define DP_DISCARD 64 /* equals REQ_DISCARD */
|
||||
#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
|
||||
#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
|
||||
|
||||
struct p_data {
|
||||
u64 sector; /* 64 bits sector number */
|
||||
u64 block_id; /* to identify the request in protocol B&C */
|
||||
u32 seq_num;
|
||||
u32 dp_flags;
|
||||
} __packed;
|
||||
|
||||
struct p_trim {
|
||||
struct p_data p_data;
|
||||
u32 size; /* == bio->bi_size */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* commands which share a struct:
|
||||
* p_block_ack:
|
||||
* P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
|
||||
* P_SUPERSEDED (proto C, two-primaries conflict detection)
|
||||
* p_block_req:
|
||||
* P_DATA_REQUEST, P_RS_DATA_REQUEST
|
||||
*/
|
||||
struct p_block_ack {
|
||||
u64 sector;
|
||||
u64 block_id;
|
||||
u32 blksize;
|
||||
u32 seq_num;
|
||||
} __packed;
|
||||
|
||||
struct p_block_req {
|
||||
u64 sector;
|
||||
u64 block_id;
|
||||
u32 blksize;
|
||||
u32 pad; /* to multiple of 8 Byte */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* commands with their own struct for additional fields:
|
||||
* P_CONNECTION_FEATURES
|
||||
* P_BARRIER
|
||||
* P_BARRIER_ACK
|
||||
* P_SYNC_PARAM
|
||||
* ReportParams
|
||||
*/
|
||||
|
||||
#define FF_TRIM 1
|
||||
|
||||
struct p_connection_features {
|
||||
u32 protocol_min;
|
||||
u32 feature_flags;
|
||||
u32 protocol_max;
|
||||
|
||||
/* should be more than enough for future enhancements
|
||||
* for now, feature_flags and the reserved array shall be zero.
|
||||
*/
|
||||
|
||||
u32 _pad;
|
||||
u64 reserved[7];
|
||||
} __packed;
|
||||
|
||||
struct p_barrier {
|
||||
u32 barrier; /* barrier number _handle_ only */
|
||||
u32 pad; /* to multiple of 8 Byte */
|
||||
} __packed;
|
||||
|
||||
struct p_barrier_ack {
|
||||
u32 barrier;
|
||||
u32 set_size;
|
||||
} __packed;
|
||||
|
||||
struct p_rs_param {
|
||||
u32 resync_rate;
|
||||
|
||||
/* Since protocol version 88 and higher. */
|
||||
char verify_alg[0];
|
||||
} __packed;
|
||||
|
||||
struct p_rs_param_89 {
|
||||
u32 resync_rate;
|
||||
/* protocol version 89: */
|
||||
char verify_alg[SHARED_SECRET_MAX];
|
||||
char csums_alg[SHARED_SECRET_MAX];
|
||||
} __packed;
|
||||
|
||||
struct p_rs_param_95 {
|
||||
u32 resync_rate;
|
||||
char verify_alg[SHARED_SECRET_MAX];
|
||||
char csums_alg[SHARED_SECRET_MAX];
|
||||
u32 c_plan_ahead;
|
||||
u32 c_delay_target;
|
||||
u32 c_fill_target;
|
||||
u32 c_max_rate;
|
||||
} __packed;
|
||||
|
||||
enum drbd_conn_flags {
|
||||
CF_DISCARD_MY_DATA = 1,
|
||||
CF_DRY_RUN = 2,
|
||||
};
|
||||
|
||||
struct p_protocol {
|
||||
u32 protocol;
|
||||
u32 after_sb_0p;
|
||||
u32 after_sb_1p;
|
||||
u32 after_sb_2p;
|
||||
u32 conn_flags;
|
||||
u32 two_primaries;
|
||||
|
||||
/* Since protocol version 87 and higher. */
|
||||
char integrity_alg[0];
|
||||
|
||||
} __packed;
|
||||
|
||||
struct p_uuids {
|
||||
u64 uuid[UI_EXTENDED_SIZE];
|
||||
} __packed;
|
||||
|
||||
struct p_rs_uuid {
|
||||
u64 uuid;
|
||||
} __packed;
|
||||
|
||||
struct p_sizes {
|
||||
u64 d_size; /* size of disk */
|
||||
u64 u_size; /* user requested size */
|
||||
u64 c_size; /* current exported size */
|
||||
u32 max_bio_size; /* Maximal size of a BIO */
|
||||
u16 queue_order_type; /* not yet implemented in DRBD*/
|
||||
u16 dds_flags; /* use enum dds_flags here. */
|
||||
} __packed;
|
||||
|
||||
struct p_state {
|
||||
u32 state;
|
||||
} __packed;
|
||||
|
||||
struct p_req_state {
|
||||
u32 mask;
|
||||
u32 val;
|
||||
} __packed;
|
||||
|
||||
struct p_req_state_reply {
|
||||
u32 retcode;
|
||||
} __packed;
|
||||
|
||||
struct p_drbd06_param {
|
||||
u64 size;
|
||||
u32 state;
|
||||
u32 blksize;
|
||||
u32 protocol;
|
||||
u32 version;
|
||||
u32 gen_cnt[5];
|
||||
u32 bit_map_gen[5];
|
||||
} __packed;
|
||||
|
||||
struct p_block_desc {
|
||||
u64 sector;
|
||||
u32 blksize;
|
||||
u32 pad; /* to multiple of 8 Byte */
|
||||
} __packed;
|
||||
|
||||
/* Valid values for the encoding field.
|
||||
* Bump proto version when changing this. */
|
||||
enum drbd_bitmap_code {
|
||||
/* RLE_VLI_Bytes = 0,
|
||||
* and other bit variants had been defined during
|
||||
* algorithm evaluation. */
|
||||
RLE_VLI_Bits = 2,
|
||||
};
|
||||
|
||||
struct p_compressed_bm {
|
||||
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
|
||||
* (encoding & 0x80): polarity (set/unset) of first runlength
|
||||
* ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
|
||||
* used to pad up to head.length bytes
|
||||
*/
|
||||
u8 encoding;
|
||||
|
||||
u8 code[0];
|
||||
} __packed;
|
||||
|
||||
struct p_delay_probe93 {
|
||||
u32 seq_num; /* sequence number to match the two probe packets */
|
||||
u32 offset; /* usecs the probe got sent after the reference time point */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Bitmap packets need to fit within a single page on the sender and receiver,
|
||||
* so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
|
||||
*/
|
||||
#define DRBD_SOCKET_BUFFER_SIZE 4096
|
||||
|
||||
#endif /* __DRBD_PROTOCOL_H */
|
5661
drivers/block/drbd/drbd_receiver.c
Normal file
5661
drivers/block/drbd/drbd_receiver.c
Normal file
File diff suppressed because it is too large
Load diff
1651
drivers/block/drbd/drbd_req.c
Normal file
1651
drivers/block/drbd/drbd_req.c
Normal file
File diff suppressed because it is too large
Load diff
351
drivers/block/drbd/drbd_req.h
Normal file
351
drivers/block/drbd/drbd_req.h
Normal file
|
@ -0,0 +1,351 @@
|
|||
/*
|
||||
drbd_req.h
|
||||
|
||||
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
|
||||
|
||||
Copyright (C) 2006-2008, LINBIT Information Technologies GmbH.
|
||||
Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
|
||||
Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>.
|
||||
|
||||
DRBD is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
DRBD is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with drbd; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#ifndef _DRBD_REQ_H
|
||||
#define _DRBD_REQ_H
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/drbd.h>
|
||||
#include "drbd_int.h"
|
||||
|
||||
/* The request callbacks will be called in irq context by the IDE drivers,
|
||||
and in Softirqs/Tasklets/BH context by the SCSI drivers,
|
||||
and by the receiver and worker in kernel-thread context.
|
||||
Try to get the locking right :) */
|
||||
|
||||
/*
|
||||
* Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are
|
||||
* associated with IO requests originating from the block layer above us.
|
||||
*
|
||||
* There are quite a few things that may happen to a drbd request
|
||||
* during its lifetime.
|
||||
*
|
||||
* It will be created.
|
||||
* It will be marked with the intention to be
|
||||
* submitted to local disk and/or
|
||||
* send via the network.
|
||||
*
|
||||
* It has to be placed on the transfer log and other housekeeping lists,
|
||||
* In case we have a network connection.
|
||||
*
|
||||
* It may be identified as a concurrent (write) request
|
||||
* and be handled accordingly.
|
||||
*
|
||||
* It may me handed over to the local disk subsystem.
|
||||
* It may be completed by the local disk subsystem,
|
||||
* either successfully or with io-error.
|
||||
* In case it is a READ request, and it failed locally,
|
||||
* it may be retried remotely.
|
||||
*
|
||||
* It may be queued for sending.
|
||||
* It may be handed over to the network stack,
|
||||
* which may fail.
|
||||
* It may be acknowledged by the "peer" according to the wire_protocol in use.
|
||||
* this may be a negative ack.
|
||||
* It may receive a faked ack when the network connection is lost and the
|
||||
* transfer log is cleaned up.
|
||||
* Sending may be canceled due to network connection loss.
|
||||
* When it finally has outlived its time,
|
||||
* corresponding dirty bits in the resync-bitmap may be cleared or set,
|
||||
* it will be destroyed,
|
||||
* and completion will be signalled to the originator,
|
||||
* with or without "success".
|
||||
*/
|
||||
|
||||
enum drbd_req_event {
|
||||
CREATED,
|
||||
TO_BE_SENT,
|
||||
TO_BE_SUBMITTED,
|
||||
|
||||
/* XXX yes, now I am inconsistent...
|
||||
* these are not "events" but "actions"
|
||||
* oh, well... */
|
||||
QUEUE_FOR_NET_WRITE,
|
||||
QUEUE_FOR_NET_READ,
|
||||
QUEUE_FOR_SEND_OOS,
|
||||
|
||||
/* An empty flush is queued as P_BARRIER,
|
||||
* which will cause it to complete "successfully",
|
||||
* even if the local disk flush failed.
|
||||
*
|
||||
* Just like "real" requests, empty flushes (blkdev_issue_flush()) will
|
||||
* only see an error if neither local nor remote data is reachable. */
|
||||
QUEUE_AS_DRBD_BARRIER,
|
||||
|
||||
SEND_CANCELED,
|
||||
SEND_FAILED,
|
||||
HANDED_OVER_TO_NETWORK,
|
||||
OOS_HANDED_TO_NETWORK,
|
||||
CONNECTION_LOST_WHILE_PENDING,
|
||||
READ_RETRY_REMOTE_CANCELED,
|
||||
RECV_ACKED_BY_PEER,
|
||||
WRITE_ACKED_BY_PEER,
|
||||
WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
|
||||
CONFLICT_RESOLVED,
|
||||
POSTPONE_WRITE,
|
||||
NEG_ACKED,
|
||||
BARRIER_ACKED, /* in protocol A and B */
|
||||
DATA_RECEIVED, /* (remote read) */
|
||||
|
||||
COMPLETED_OK,
|
||||
READ_COMPLETED_WITH_ERROR,
|
||||
READ_AHEAD_COMPLETED_WITH_ERROR,
|
||||
WRITE_COMPLETED_WITH_ERROR,
|
||||
DISCARD_COMPLETED_NOTSUPP,
|
||||
DISCARD_COMPLETED_WITH_ERROR,
|
||||
|
||||
ABORT_DISK_IO,
|
||||
RESEND,
|
||||
FAIL_FROZEN_DISK_IO,
|
||||
RESTART_FROZEN_DISK_IO,
|
||||
NOTHING,
|
||||
};
|
||||
|
||||
/* encoding of request states for now. we don't actually need that many bits.
|
||||
* we don't need to do atomic bit operations either, since most of the time we
|
||||
* need to look at the connection state and/or manipulate some lists at the
|
||||
* same time, so we should hold the request lock anyways.
|
||||
*/
|
||||
enum drbd_req_state_bits {
|
||||
/* 3210
|
||||
* 0000: no local possible
|
||||
* 0001: to be submitted
|
||||
* UNUSED, we could map: 011: submitted, completion still pending
|
||||
* 0110: completed ok
|
||||
* 0010: completed with error
|
||||
* 1001: Aborted (before completion)
|
||||
* 1x10: Aborted and completed -> free
|
||||
*/
|
||||
__RQ_LOCAL_PENDING,
|
||||
__RQ_LOCAL_COMPLETED,
|
||||
__RQ_LOCAL_OK,
|
||||
__RQ_LOCAL_ABORTED,
|
||||
|
||||
/* 87654
|
||||
* 00000: no network possible
|
||||
* 00001: to be send
|
||||
* 00011: to be send, on worker queue
|
||||
* 00101: sent, expecting recv_ack (B) or write_ack (C)
|
||||
* 11101: sent,
|
||||
* recv_ack (B) or implicit "ack" (A),
|
||||
* still waiting for the barrier ack.
|
||||
* master_bio may already be completed and invalidated.
|
||||
* 11100: write acked (C),
|
||||
* data received (for remote read, any protocol)
|
||||
* or finally the barrier ack has arrived (B,A)...
|
||||
* request can be freed
|
||||
* 01100: neg-acked (write, protocol C)
|
||||
* or neg-d-acked (read, any protocol)
|
||||
* or killed from the transfer log
|
||||
* during cleanup after connection loss
|
||||
* request can be freed
|
||||
* 01000: canceled or send failed...
|
||||
* request can be freed
|
||||
*/
|
||||
|
||||
/* if "SENT" is not set, yet, this can still fail or be canceled.
|
||||
* if "SENT" is set already, we still wait for an Ack packet.
|
||||
* when cleared, the master_bio may be completed.
|
||||
* in (B,A) the request object may still linger on the transaction log
|
||||
* until the corresponding barrier ack comes in */
|
||||
__RQ_NET_PENDING,
|
||||
|
||||
/* If it is QUEUED, and it is a WRITE, it is also registered in the
|
||||
* transfer log. Currently we need this flag to avoid conflicts between
|
||||
* worker canceling the request and tl_clear_barrier killing it from
|
||||
* transfer log. We should restructure the code so this conflict does
|
||||
* no longer occur. */
|
||||
__RQ_NET_QUEUED,
|
||||
|
||||
/* well, actually only "handed over to the network stack".
|
||||
*
|
||||
* TODO can potentially be dropped because of the similar meaning
|
||||
* of RQ_NET_SENT and ~RQ_NET_QUEUED.
|
||||
* however it is not exactly the same. before we drop it
|
||||
* we must ensure that we can tell a request with network part
|
||||
* from a request without, regardless of what happens to it. */
|
||||
__RQ_NET_SENT,
|
||||
|
||||
/* when set, the request may be freed (if RQ_NET_QUEUED is clear).
|
||||
* basically this means the corresponding P_BARRIER_ACK was received */
|
||||
__RQ_NET_DONE,
|
||||
|
||||
/* whether or not we know (C) or pretend (B,A) that the write
|
||||
* was successfully written on the peer.
|
||||
*/
|
||||
__RQ_NET_OK,
|
||||
|
||||
/* peer called drbd_set_in_sync() for this write */
|
||||
__RQ_NET_SIS,
|
||||
|
||||
/* keep this last, its for the RQ_NET_MASK */
|
||||
__RQ_NET_MAX,
|
||||
|
||||
/* Set when this is a write, clear for a read */
|
||||
__RQ_WRITE,
|
||||
|
||||
/* Should call drbd_al_complete_io() for this request... */
|
||||
__RQ_IN_ACT_LOG,
|
||||
|
||||
/* The peer has sent a retry ACK */
|
||||
__RQ_POSTPONED,
|
||||
|
||||
/* would have been completed,
|
||||
* but was not, because of drbd_suspended() */
|
||||
__RQ_COMPLETION_SUSP,
|
||||
|
||||
/* We expect a receive ACK (wire proto B) */
|
||||
__RQ_EXP_RECEIVE_ACK,
|
||||
|
||||
/* We expect a write ACK (wite proto C) */
|
||||
__RQ_EXP_WRITE_ACK,
|
||||
|
||||
/* waiting for a barrier ack, did an extra kref_get */
|
||||
__RQ_EXP_BARR_ACK,
|
||||
};
|
||||
|
||||
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
|
||||
#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
|
||||
#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
|
||||
#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED)
|
||||
|
||||
#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1)
|
||||
|
||||
#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
|
||||
#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
|
||||
#define RQ_NET_SENT (1UL << __RQ_NET_SENT)
|
||||
#define RQ_NET_DONE (1UL << __RQ_NET_DONE)
|
||||
#define RQ_NET_OK (1UL << __RQ_NET_OK)
|
||||
#define RQ_NET_SIS (1UL << __RQ_NET_SIS)
|
||||
|
||||
/* 0x1f8 */
|
||||
#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
|
||||
|
||||
#define RQ_WRITE (1UL << __RQ_WRITE)
|
||||
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
|
||||
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
|
||||
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
|
||||
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
|
||||
#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
|
||||
#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK)
|
||||
|
||||
/* For waking up the frozen transfer log mod_req() has to return if the request
|
||||
should be counted in the epoch object*/
|
||||
#define MR_WRITE 1
|
||||
#define MR_READ 2
|
||||
|
||||
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
|
||||
{
|
||||
struct bio *bio;
|
||||
bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
|
||||
|
||||
req->private_bio = bio;
|
||||
|
||||
bio->bi_private = req;
|
||||
bio->bi_end_io = drbd_request_endio;
|
||||
bio->bi_next = NULL;
|
||||
}
|
||||
|
||||
/* Short lived temporary struct on the stack.
|
||||
* We could squirrel the error to be returned into
|
||||
* bio->bi_iter.bi_size, or similar. But that would be too ugly. */
|
||||
struct bio_and_error {
|
||||
struct bio *bio;
|
||||
int error;
|
||||
};
|
||||
|
||||
extern void start_new_tl_epoch(struct drbd_connection *connection);
|
||||
extern void drbd_req_destroy(struct kref *kref);
|
||||
extern void _req_may_be_done(struct drbd_request *req,
|
||||
struct bio_and_error *m);
|
||||
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
struct bio_and_error *m);
|
||||
extern void complete_master_bio(struct drbd_device *device,
|
||||
struct bio_and_error *m);
|
||||
extern void request_timer_fn(unsigned long data);
|
||||
extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
|
||||
extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
|
||||
extern void tl_abort_disk_io(struct drbd_device *device);
|
||||
|
||||
/* this is in drbd_main.c */
|
||||
extern void drbd_restart_request(struct drbd_request *req);
|
||||
|
||||
/* use this if you don't want to deal with calling complete_master_bio()
|
||||
* outside the spinlock, e.g. when walking some list on cleanup. */
|
||||
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
|
||||
{
|
||||
struct drbd_device *device = req->device;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
/* __req_mod possibly frees req, do not touch req after that! */
|
||||
rv = __req_mod(req, what, &m);
|
||||
if (m.bio)
|
||||
complete_master_bio(device, &m);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* completion of master bio is outside of our spinlock.
|
||||
* We still may or may not be inside some irqs disabled section
|
||||
* of the lower level driver completion callback, so we need to
|
||||
* spin_lock_irqsave here. */
|
||||
static inline int req_mod(struct drbd_request *req,
|
||||
enum drbd_req_event what)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct drbd_device *device = req->device;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
spin_lock_irqsave(&device->resource->req_lock, flags);
|
||||
rv = __req_mod(req, what, &m);
|
||||
spin_unlock_irqrestore(&device->resource->req_lock, flags);
|
||||
|
||||
if (m.bio)
|
||||
complete_master_bio(device, &m);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static inline bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
s.conn >= C_WF_BITMAP_T &&
|
||||
s.conn < C_AHEAD);
|
||||
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
|
||||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
since we enter state C_AHEAD only if proto >= 96 */
|
||||
}
|
||||
|
||||
#endif
|
1892
drivers/block/drbd/drbd_state.c
Normal file
1892
drivers/block/drbd/drbd_state.c
Normal file
File diff suppressed because it is too large
Load diff
161
drivers/block/drbd/drbd_state.h
Normal file
161
drivers/block/drbd/drbd_state.h
Normal file
|
@ -0,0 +1,161 @@
|
|||
#ifndef DRBD_STATE_H
|
||||
#define DRBD_STATE_H
|
||||
|
||||
struct drbd_device;
|
||||
struct drbd_connection;
|
||||
|
||||
/**
|
||||
* DOC: DRBD State macros
|
||||
*
|
||||
* These macros are used to express state changes in easily readable form.
|
||||
*
|
||||
* The NS macros expand to a mask and a value, that can be bit ored onto the
|
||||
* current state as soon as the spinlock (req_lock) was taken.
|
||||
*
|
||||
* The _NS macros are used for state functions that get called with the
|
||||
* spinlock. These macros expand directly to the new state value.
|
||||
*
|
||||
* Besides the basic forms NS() and _NS() additional _?NS[23] are defined
|
||||
* to express state changes that affect more than one aspect of the state.
|
||||
*
|
||||
* E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
|
||||
* Means that the network connection was established and that the peer
|
||||
* is in secondary role.
|
||||
*/
|
||||
#define role_MASK R_MASK
|
||||
#define peer_MASK R_MASK
|
||||
#define disk_MASK D_MASK
|
||||
#define pdsk_MASK D_MASK
|
||||
#define conn_MASK C_MASK
|
||||
#define susp_MASK 1
|
||||
#define user_isp_MASK 1
|
||||
#define aftr_isp_MASK 1
|
||||
#define susp_nod_MASK 1
|
||||
#define susp_fen_MASK 1
|
||||
|
||||
#define NS(T, S) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T = (S); val; })
|
||||
#define NS2(T1, S1, T2, S2) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
|
||||
mask.T2 = T2##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
|
||||
val.T2 = (S2); val; })
|
||||
#define NS3(T1, S1, T2, S2, T3, S3) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
|
||||
mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
|
||||
val.T2 = (S2); val.T3 = (S3); val; })
|
||||
|
||||
#define _NS(D, T, S) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
|
||||
#define _NS2(D, T1, S1, T2, S2) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
|
||||
__ns.T2 = (S2); __ns; })
|
||||
#define _NS3(D, T1, S1, T2, S2, T3, S3) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
|
||||
__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
|
||||
|
||||
enum chg_state_flags {
|
||||
CS_HARD = 1 << 0,
|
||||
CS_VERBOSE = 1 << 1,
|
||||
CS_WAIT_COMPLETE = 1 << 2,
|
||||
CS_SERIALIZE = 1 << 3,
|
||||
CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
|
||||
CS_LOCAL_ONLY = 1 << 4, /* Do not consider a device pair wide state change */
|
||||
CS_DC_ROLE = 1 << 5, /* DC = display as connection state change */
|
||||
CS_DC_PEER = 1 << 6,
|
||||
CS_DC_CONN = 1 << 7,
|
||||
CS_DC_DISK = 1 << 8,
|
||||
CS_DC_PDSK = 1 << 9,
|
||||
CS_DC_SUSP = 1 << 10,
|
||||
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
|
||||
CS_IGN_OUTD_FAIL = 1 << 11,
|
||||
};
|
||||
|
||||
/* drbd_dev_state and drbd_state are different types. This is to stress the
|
||||
small difference. There is no suspended flag (.susp), and no suspended
|
||||
while fence handler runs flas (susp_fen). */
|
||||
union drbd_dev_state {
|
||||
struct {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned conn:5 ; /* 17/32 cstates */
|
||||
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned _unused:1 ;
|
||||
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
|
||||
unsigned peer_isp:1 ;
|
||||
unsigned user_isp:1 ;
|
||||
unsigned _pad:11; /* 0 unused */
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
unsigned _pad:11;
|
||||
unsigned user_isp:1 ;
|
||||
unsigned peer_isp:1 ;
|
||||
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
|
||||
unsigned _unused:1 ;
|
||||
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned conn:5 ; /* 17/32 cstates */
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
#else
|
||||
# error "this endianess is not supported"
|
||||
#endif
|
||||
};
|
||||
unsigned int i;
|
||||
};
|
||||
|
||||
extern enum drbd_state_rv drbd_change_state(struct drbd_device *device,
|
||||
enum chg_state_flags f,
|
||||
union drbd_state mask,
|
||||
union drbd_state val);
|
||||
extern void drbd_force_state(struct drbd_device *, union drbd_state,
|
||||
union drbd_state);
|
||||
extern enum drbd_state_rv _drbd_request_state(struct drbd_device *,
|
||||
union drbd_state,
|
||||
union drbd_state,
|
||||
enum chg_state_flags);
|
||||
extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern void print_st_err(struct drbd_device *, union drbd_state,
|
||||
union drbd_state, int);
|
||||
|
||||
enum drbd_state_rv
|
||||
_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
|
||||
enum chg_state_flags flags);
|
||||
|
||||
enum drbd_state_rv
|
||||
conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
|
||||
enum chg_state_flags flags);
|
||||
|
||||
extern void drbd_resume_al(struct drbd_device *device);
|
||||
extern bool conn_all_vols_unconf(struct drbd_connection *connection);
|
||||
|
||||
/**
|
||||
* drbd_request_state() - Reqest a state change
|
||||
* @device: DRBD device.
|
||||
* @mask: mask of state bits to change.
|
||||
* @val: value of new state bits.
|
||||
*
|
||||
* This is the most graceful way of requesting a state change. It is verbose
|
||||
* quite verbose in case the state change is not possible, and all those
|
||||
* state changes are globally serialized.
|
||||
*/
|
||||
static inline int drbd_request_state(struct drbd_device *device,
|
||||
union drbd_state mask,
|
||||
union drbd_state val)
|
||||
{
|
||||
return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
|
||||
}
|
||||
|
||||
enum drbd_role conn_highest_role(struct drbd_connection *connection);
|
||||
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
|
||||
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);
|
||||
enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection);
|
||||
enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection);
|
||||
enum drbd_conns conn_lowest_conn(struct drbd_connection *connection);
|
||||
|
||||
#endif
|
118
drivers/block/drbd/drbd_strings.c
Normal file
118
drivers/block/drbd/drbd_strings.c
Normal file
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
drbd.h
|
||||
|
||||
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
|
||||
|
||||
Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
|
||||
Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
|
||||
Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
|
||||
|
||||
drbd is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
drbd is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with drbd; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
*/
|
||||
|
||||
#include <linux/drbd.h>
|
||||
#include "drbd_strings.h"
|
||||
|
||||
static const char *drbd_conn_s_names[] = {
|
||||
[C_STANDALONE] = "StandAlone",
|
||||
[C_DISCONNECTING] = "Disconnecting",
|
||||
[C_UNCONNECTED] = "Unconnected",
|
||||
[C_TIMEOUT] = "Timeout",
|
||||
[C_BROKEN_PIPE] = "BrokenPipe",
|
||||
[C_NETWORK_FAILURE] = "NetworkFailure",
|
||||
[C_PROTOCOL_ERROR] = "ProtocolError",
|
||||
[C_WF_CONNECTION] = "WFConnection",
|
||||
[C_WF_REPORT_PARAMS] = "WFReportParams",
|
||||
[C_TEAR_DOWN] = "TearDown",
|
||||
[C_CONNECTED] = "Connected",
|
||||
[C_STARTING_SYNC_S] = "StartingSyncS",
|
||||
[C_STARTING_SYNC_T] = "StartingSyncT",
|
||||
[C_WF_BITMAP_S] = "WFBitMapS",
|
||||
[C_WF_BITMAP_T] = "WFBitMapT",
|
||||
[C_WF_SYNC_UUID] = "WFSyncUUID",
|
||||
[C_SYNC_SOURCE] = "SyncSource",
|
||||
[C_SYNC_TARGET] = "SyncTarget",
|
||||
[C_PAUSED_SYNC_S] = "PausedSyncS",
|
||||
[C_PAUSED_SYNC_T] = "PausedSyncT",
|
||||
[C_VERIFY_S] = "VerifyS",
|
||||
[C_VERIFY_T] = "VerifyT",
|
||||
[C_AHEAD] = "Ahead",
|
||||
[C_BEHIND] = "Behind",
|
||||
};
|
||||
|
||||
static const char *drbd_role_s_names[] = {
|
||||
[R_PRIMARY] = "Primary",
|
||||
[R_SECONDARY] = "Secondary",
|
||||
[R_UNKNOWN] = "Unknown"
|
||||
};
|
||||
|
||||
static const char *drbd_disk_s_names[] = {
|
||||
[D_DISKLESS] = "Diskless",
|
||||
[D_ATTACHING] = "Attaching",
|
||||
[D_FAILED] = "Failed",
|
||||
[D_NEGOTIATING] = "Negotiating",
|
||||
[D_INCONSISTENT] = "Inconsistent",
|
||||
[D_OUTDATED] = "Outdated",
|
||||
[D_UNKNOWN] = "DUnknown",
|
||||
[D_CONSISTENT] = "Consistent",
|
||||
[D_UP_TO_DATE] = "UpToDate",
|
||||
};
|
||||
|
||||
static const char *drbd_state_sw_errors[] = {
|
||||
[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
|
||||
[-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data",
|
||||
[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
|
||||
[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
|
||||
[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
|
||||
[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
|
||||
[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
|
||||
[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
|
||||
[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
|
||||
[-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk",
|
||||
[-SS_DEVICE_IN_USE] = "Device is held open by someone",
|
||||
[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
|
||||
[-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify",
|
||||
[-SS_NEED_CONNECTION] = "Need a connection to start verify or resync",
|
||||
[-SS_NOT_SUPPORTED] = "Peer does not support protocol",
|
||||
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
|
||||
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
|
||||
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
|
||||
[-SS_OUTDATE_WO_CONN] = "Need a connection for a graceful disconnect/outdate peer",
|
||||
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
|
||||
};
|
||||
|
||||
const char *drbd_conn_str(enum drbd_conns s)
|
||||
{
|
||||
/* enums are unsigned... */
|
||||
return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
|
||||
}
|
||||
|
||||
const char *drbd_role_str(enum drbd_role s)
|
||||
{
|
||||
return s > R_SECONDARY ? "TOO_LARGE" : drbd_role_s_names[s];
|
||||
}
|
||||
|
||||
const char *drbd_disk_str(enum drbd_disk_state s)
|
||||
{
|
||||
return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s];
|
||||
}
|
||||
|
||||
const char *drbd_set_st_err_str(enum drbd_state_rv err)
|
||||
{
|
||||
return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
|
||||
err > SS_TWO_PRIMARIES ? "TOO_LARGE"
|
||||
: drbd_state_sw_errors[-err];
|
||||
}
|
9
drivers/block/drbd/drbd_strings.h
Normal file
9
drivers/block/drbd/drbd_strings.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
#ifndef __DRBD_STRINGS_H
|
||||
#define __DRBD_STRINGS_H
|
||||
|
||||
extern const char *drbd_conn_str(enum drbd_conns);
|
||||
extern const char *drbd_role_str(enum drbd_role);
|
||||
extern const char *drbd_disk_str(enum drbd_disk_state);
|
||||
extern const char *drbd_set_st_err_str(enum drbd_state_rv);
|
||||
|
||||
#endif /* __DRBD_STRINGS_H */
|
351
drivers/block/drbd/drbd_vli.h
Normal file
351
drivers/block/drbd/drbd_vli.h
Normal file
|
@ -0,0 +1,351 @@
|
|||
/*
|
||||
-*- linux-c -*-
|
||||
drbd_receiver.c
|
||||
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
|
||||
|
||||
Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
|
||||
Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
|
||||
Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
|
||||
|
||||
drbd is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
drbd is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with drbd; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#ifndef _DRBD_VLI_H
|
||||
#define _DRBD_VLI_H
|
||||
|
||||
/*
|
||||
* At a granularity of 4KiB storage represented per bit,
|
||||
* and stroage sizes of several TiB,
|
||||
* and possibly small-bandwidth replication,
|
||||
* the bitmap transfer time can take much too long,
|
||||
* if transmitted in plain text.
|
||||
*
|
||||
* We try to reduce the transferred bitmap information
|
||||
* by encoding runlengths of bit polarity.
|
||||
*
|
||||
* We never actually need to encode a "zero" (runlengths are positive).
|
||||
* But then we have to store the value of the first bit.
|
||||
* The first bit of information thus shall encode if the first runlength
|
||||
* gives the number of set or unset bits.
|
||||
*
|
||||
* We assume that large areas are either completely set or unset,
|
||||
* which gives good compression with any runlength method,
|
||||
* even when encoding the runlength as fixed size 32bit/64bit integers.
|
||||
*
|
||||
* Still, there may be areas where the polarity flips every few bits,
|
||||
* and encoding the runlength sequence of those areas with fix size
|
||||
* integers would be much worse than plaintext.
|
||||
*
|
||||
* We want to encode small runlength values with minimum code length,
|
||||
* while still being able to encode a Huge run of all zeros.
|
||||
*
|
||||
* Thus we need a Variable Length Integer encoding, VLI.
|
||||
*
|
||||
* For some cases, we produce more code bits than plaintext input.
|
||||
* We need to send incompressible chunks as plaintext, skip over them
|
||||
* and then see if the next chunk compresses better.
|
||||
*
|
||||
* We don't care too much about "excellent" compression ratio for large
|
||||
* runlengths (all set/all clear): whether we achieve a factor of 100
|
||||
* or 1000 is not that much of an issue.
|
||||
* We do not want to waste too much on short runlengths in the "noisy"
|
||||
* parts of the bitmap, though.
|
||||
*
|
||||
* There are endless variants of VLI, we experimented with:
|
||||
* * simple byte-based
|
||||
* * various bit based with different code word length.
|
||||
*
|
||||
* To avoid yet an other configuration parameter (choice of bitmap compression
|
||||
* algorithm) which was difficult to explain and tune, we just chose the one
|
||||
* variant that turned out best in all test cases.
|
||||
* Based on real world usage patterns, with device sizes ranging from a few GiB
|
||||
* to several TiB, file server/mailserver/webserver/mysql/postgress,
|
||||
* mostly idle to really busy, the all time winner (though sometimes only
|
||||
* marginally better) is:
|
||||
*/
|
||||
|
||||
/*
|
||||
* encoding is "visualised" as
|
||||
* __little endian__ bitstream, least significant bit first (left most)
|
||||
*
|
||||
* this particular encoding is chosen so that the prefix code
|
||||
* starts as unary encoding the level, then modified so that
|
||||
* 10 levels can be described in 8bit, with minimal overhead
|
||||
* for the smaller levels.
|
||||
*
|
||||
* Number of data bits follow fibonacci sequence, with the exception of the
|
||||
* last level (+1 data bit, so it makes 64bit total). The only worse code when
|
||||
* encoding bit polarity runlength is 1 plain bits => 2 code bits.
|
||||
prefix data bits max val Nº data bits
|
||||
0 x 0x2 1
|
||||
10 x 0x4 1
|
||||
110 xx 0x8 2
|
||||
1110 xxx 0x10 3
|
||||
11110 xxx xx 0x30 5
|
||||
111110 xx xxxxxx 0x130 8
|
||||
11111100 xxxxxxxx xxxxx 0x2130 13
|
||||
11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21
|
||||
11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34
|
||||
11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
|
||||
* maximum encodable value: 0x100000400202130 == 2**56 + some */
|
||||
|
||||
/* compression "table":
|
||||
transmitted x 0.29
|
||||
as plaintext x ........................
|
||||
x ........................
|
||||
x ........................
|
||||
x 0.59 0.21........................
|
||||
x ........................................................
|
||||
x .. c ...................................................
|
||||
x 0.44.. o ...................................................
|
||||
x .......... d ...................................................
|
||||
x .......... e ...................................................
|
||||
X............. ...................................................
|
||||
x.............. b ...................................................
|
||||
2.0x............... i ...................................................
|
||||
#X................ t ...................................................
|
||||
#................. s ........................... plain bits ..........
|
||||
-+-----------------------------------------------------------------------
|
||||
1 16 32 64
|
||||
*/
|
||||
|
||||
/* LEVEL: (total bits, prefix bits, prefix value),
|
||||
* sorted ascending by number of total bits.
|
||||
* The rest of the code table is calculated at compiletime from this. */
|
||||
|
||||
/* fibonacci data 1, 1, ... */
|
||||
#define VLI_L_1_1() do { \
|
||||
LEVEL( 2, 1, 0x00); \
|
||||
LEVEL( 3, 2, 0x01); \
|
||||
LEVEL( 5, 3, 0x03); \
|
||||
LEVEL( 7, 4, 0x07); \
|
||||
LEVEL(10, 5, 0x0f); \
|
||||
LEVEL(14, 6, 0x1f); \
|
||||
LEVEL(21, 8, 0x3f); \
|
||||
LEVEL(29, 8, 0x7f); \
|
||||
LEVEL(42, 8, 0xbf); \
|
||||
LEVEL(64, 8, 0xff); \
|
||||
} while (0)
|
||||
|
||||
/* finds a suitable level to decode the least significant part of in.
|
||||
* returns number of bits consumed.
|
||||
*
|
||||
* BUG() for bad input, as that would mean a buggy code table. */
|
||||
static inline int vli_decode_bits(u64 *out, const u64 in)
|
||||
{
|
||||
u64 adj = 1;
|
||||
|
||||
#define LEVEL(t,b,v) \
|
||||
do { \
|
||||
if ((in & ((1 << b) -1)) == v) { \
|
||||
*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
|
||||
return t; \
|
||||
} \
|
||||
adj += 1ULL << (t - b); \
|
||||
} while (0)
|
||||
|
||||
VLI_L_1_1();
|
||||
|
||||
/* NOT REACHED, if VLI_LEVELS code table is defined properly */
|
||||
BUG();
|
||||
#undef LEVEL
|
||||
}
|
||||
|
||||
/* return number of code bits needed,
|
||||
* or negative error number */
|
||||
static inline int __vli_encode_bits(u64 *out, const u64 in)
|
||||
{
|
||||
u64 max = 0;
|
||||
u64 adj = 1;
|
||||
|
||||
if (in == 0)
|
||||
return -EINVAL;
|
||||
|
||||
#define LEVEL(t,b,v) do { \
|
||||
max += 1ULL << (t - b); \
|
||||
if (in <= max) { \
|
||||
if (out) \
|
||||
*out = ((in - adj) << b) | v; \
|
||||
return t; \
|
||||
} \
|
||||
adj = max + 1; \
|
||||
} while (0)
|
||||
|
||||
VLI_L_1_1();
|
||||
|
||||
return -EOVERFLOW;
|
||||
#undef LEVEL
|
||||
}
|
||||
|
||||
#undef VLI_L_1_1
|
||||
|
||||
/* code from here down is independend of actually used bit code */
|
||||
|
||||
/*
|
||||
* Code length is determined by some unique (e.g. unary) prefix.
|
||||
* This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
|
||||
* not a byte stream.
|
||||
*/
|
||||
|
||||
/* for the bitstream, we need a cursor */
|
||||
struct bitstream_cursor {
|
||||
/* the current byte */
|
||||
u8 *b;
|
||||
/* the current bit within *b, nomalized: 0..7 */
|
||||
unsigned int bit;
|
||||
};
|
||||
|
||||
/* initialize cursor to point to first bit of stream */
|
||||
static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
|
||||
{
|
||||
cur->b = s;
|
||||
cur->bit = 0;
|
||||
}
|
||||
|
||||
/* advance cursor by that many bits; maximum expected input value: 64,
|
||||
* but depending on VLI implementation, it may be more. */
|
||||
static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
|
||||
{
|
||||
bits += cur->bit;
|
||||
cur->b = cur->b + (bits >> 3);
|
||||
cur->bit = bits & 7;
|
||||
}
|
||||
|
||||
/* the bitstream itself knows its length */
|
||||
struct bitstream {
|
||||
struct bitstream_cursor cur;
|
||||
unsigned char *buf;
|
||||
size_t buf_len; /* in bytes */
|
||||
|
||||
/* for input stream:
|
||||
* number of trailing 0 bits for padding
|
||||
* total number of valid bits in stream: buf_len * 8 - pad_bits */
|
||||
unsigned int pad_bits;
|
||||
};
|
||||
|
||||
static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
|
||||
{
|
||||
bs->buf = s;
|
||||
bs->buf_len = len;
|
||||
bs->pad_bits = pad_bits;
|
||||
bitstream_cursor_reset(&bs->cur, bs->buf);
|
||||
}
|
||||
|
||||
static inline void bitstream_rewind(struct bitstream *bs)
|
||||
{
|
||||
bitstream_cursor_reset(&bs->cur, bs->buf);
|
||||
memset(bs->buf, 0, bs->buf_len);
|
||||
}
|
||||
|
||||
/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
|
||||
* Ignores "pad_bits".
|
||||
* Returns zero if bits == 0 (nothing to do).
|
||||
* Returns number of bits used if successful.
|
||||
*
|
||||
* If there is not enough room left in bitstream,
|
||||
* leaves bitstream unchanged and returns -ENOBUFS.
|
||||
*/
|
||||
static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
|
||||
{
|
||||
unsigned char *b = bs->cur.b;
|
||||
unsigned int tmp;
|
||||
|
||||
if (bits == 0)
|
||||
return 0;
|
||||
|
||||
if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
|
||||
return -ENOBUFS;
|
||||
|
||||
/* paranoia: strip off hi bits; they should not be set anyways. */
|
||||
if (bits < 64)
|
||||
val &= ~0ULL >> (64 - bits);
|
||||
|
||||
*b++ |= (val & 0xff) << bs->cur.bit;
|
||||
|
||||
for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
|
||||
*b++ |= (val >> tmp) & 0xff;
|
||||
|
||||
bitstream_cursor_advance(&bs->cur, bits);
|
||||
return bits;
|
||||
}
|
||||
|
||||
/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
|
||||
*
|
||||
* If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
|
||||
*
|
||||
* If there are less than the requested number of valid bits left in the
|
||||
* bitstream, still fetches all available bits.
|
||||
*
|
||||
* Returns number of actually fetched bits.
|
||||
*/
|
||||
static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
|
||||
{
|
||||
u64 val;
|
||||
unsigned int n;
|
||||
|
||||
if (bits > 64)
|
||||
return -EINVAL;
|
||||
|
||||
if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
|
||||
bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
|
||||
- bs->cur.bit - bs->pad_bits;
|
||||
|
||||
if (bits == 0) {
|
||||
*out = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the high bits */
|
||||
val = 0;
|
||||
n = (bs->cur.bit + bits + 7) >> 3;
|
||||
/* n may be at most 9, if cur.bit + bits > 64 */
|
||||
/* which means this copies at most 8 byte */
|
||||
if (n) {
|
||||
memcpy(&val, bs->cur.b+1, n - 1);
|
||||
val = le64_to_cpu(val) << (8 - bs->cur.bit);
|
||||
}
|
||||
|
||||
/* we still need the low bits */
|
||||
val |= bs->cur.b[0] >> bs->cur.bit;
|
||||
|
||||
/* and mask out bits we don't want */
|
||||
val &= ~0ULL >> (64 - bits);
|
||||
|
||||
bitstream_cursor_advance(&bs->cur, bits);
|
||||
*out = val;
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
/* encodes @in as vli into @bs;
|
||||
|
||||
* return values
|
||||
* > 0: number of bits successfully stored in bitstream
|
||||
* -ENOBUFS @bs is full
|
||||
* -EINVAL input zero (invalid)
|
||||
* -EOVERFLOW input too large for this vli code (invalid)
|
||||
*/
|
||||
static inline int vli_encode_bits(struct bitstream *bs, u64 in)
|
||||
{
|
||||
u64 code = code;
|
||||
int bits = __vli_encode_bits(&code, in);
|
||||
|
||||
if (bits <= 0)
|
||||
return bits;
|
||||
|
||||
return bitstream_put_bits(bs, code, bits);
|
||||
}
|
||||
|
||||
#endif
|
2153
drivers/block/drbd/drbd_worker.c
Normal file
2153
drivers/block/drbd/drbd_worker.c
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue