Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

28
kernel/locking/Makefile Normal file
View file

@ -0,0 +1,28 @@
obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = -pg
CFLAGS_REMOVE_lockdep_proc.o = -pg
CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
endif
obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
obj-$(CONFIG_LOCKDEP) += lockdep.o
ifeq ($(CONFIG_PROC_FS),y)
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o

89
kernel/locking/lglock.c Normal file
View file

@ -0,0 +1,89 @@
/* See include/linux/lglock.h for description */
#include <linux/module.h>
#include <linux/lglock.h>
#include <linux/cpu.h>
#include <linux/string.h>
/*
* Note there is no uninit, so lglocks cannot be defined in
* modules (but it's fine to use them from there)
* Could be added though, just undo lg_lock_init
*/
void lg_lock_init(struct lglock *lg, char *name)
{
LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
}
EXPORT_SYMBOL(lg_lock_init);
void lg_local_lock(struct lglock *lg)
{
arch_spinlock_t *lock;
preempt_disable();
lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
lock = this_cpu_ptr(lg->lock);
arch_spin_lock(lock);
}
EXPORT_SYMBOL(lg_local_lock);
void lg_local_unlock(struct lglock *lg)
{
arch_spinlock_t *lock;
lock_release(&lg->lock_dep_map, 1, _RET_IP_);
lock = this_cpu_ptr(lg->lock);
arch_spin_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(lg_local_unlock);
void lg_local_lock_cpu(struct lglock *lg, int cpu)
{
arch_spinlock_t *lock;
preempt_disable();
lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
lock = per_cpu_ptr(lg->lock, cpu);
arch_spin_lock(lock);
}
EXPORT_SYMBOL(lg_local_lock_cpu);
void lg_local_unlock_cpu(struct lglock *lg, int cpu)
{
arch_spinlock_t *lock;
lock_release(&lg->lock_dep_map, 1, _RET_IP_);
lock = per_cpu_ptr(lg->lock, cpu);
arch_spin_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(lg_local_unlock_cpu);
void lg_global_lock(struct lglock *lg)
{
int i;
preempt_disable();
lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
for_each_possible_cpu(i) {
arch_spinlock_t *lock;
lock = per_cpu_ptr(lg->lock, i);
arch_spin_lock(lock);
}
}
EXPORT_SYMBOL(lg_global_lock);
void lg_global_unlock(struct lglock *lg)
{
int i;
lock_release(&lg->lock_dep_map, 1, _RET_IP_);
for_each_possible_cpu(i) {
arch_spinlock_t *lock;
lock = per_cpu_ptr(lg->lock, i);
arch_spin_unlock(lock);
}
preempt_enable();
}
EXPORT_SYMBOL(lg_global_unlock);

4260
kernel/locking/lockdep.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,170 @@
/*
* kernel/lockdep_internals.h
*
* Runtime locking correctness validator
*
* lockdep subsystem internal functions and variables.
*/
/*
* Lock-class usage-state bits:
*/
enum lock_usage_bit {
#define LOCKDEP_STATE(__STATE) \
LOCK_USED_IN_##__STATE, \
LOCK_USED_IN_##__STATE##_READ, \
LOCK_ENABLED_##__STATE, \
LOCK_ENABLED_##__STATE##_READ,
#include "lockdep_states.h"
#undef LOCKDEP_STATE
LOCK_USED,
LOCK_USAGE_STATES
};
/*
* Usage-state bitmasks:
*/
#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE),
enum {
#define LOCKDEP_STATE(__STATE) \
__LOCKF(USED_IN_##__STATE) \
__LOCKF(USED_IN_##__STATE##_READ) \
__LOCKF(ENABLED_##__STATE) \
__LOCKF(ENABLED_##__STATE##_READ)
#include "lockdep_states.h"
#undef LOCKDEP_STATE
__LOCKF(USED)
};
#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
#define LOCKF_ENABLED_IRQ_READ \
(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
#define LOCKF_USED_IN_IRQ_READ \
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
/*
* MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
* we track.
*
* We use the per-lock dependency maps in two ways: we grow it by adding
* every to-be-taken lock to all currently held lock's own dependency
* table (if it's not there yet), and we check it for lock order
* conflicts and deadlocks.
*/
#define MAX_LOCKDEP_ENTRIES 32768UL
#define MAX_LOCKDEP_CHAINS_BITS 16
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
/*
* Stack-trace: tightly packed array of stack backtrace
* addresses. Protected by the hash_lock.
*/
#define MAX_STACK_TRACE_ENTRIES 524288UL
extern struct list_head all_lock_classes;
extern struct lock_chain lock_chains[];
#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
extern void get_usage_chars(struct lock_class *class,
char usage[LOCK_USAGE_CHARS]);
extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
extern unsigned long nr_lock_classes;
extern unsigned long nr_list_entries;
extern unsigned long nr_lock_chains;
extern int nr_chain_hlocks;
extern unsigned long nr_stack_trace_entries;
extern unsigned int nr_hardirq_chains;
extern unsigned int nr_softirq_chains;
extern unsigned int nr_process_chains;
extern unsigned int max_lockdep_depth;
extern unsigned int max_recursion_depth;
extern unsigned int max_bfs_queue_depth;
#ifdef CONFIG_PROVE_LOCKING
extern unsigned long lockdep_count_forward_deps(struct lock_class *);
extern unsigned long lockdep_count_backward_deps(struct lock_class *);
#else
static inline unsigned long
lockdep_count_forward_deps(struct lock_class *class)
{
return 0;
}
static inline unsigned long
lockdep_count_backward_deps(struct lock_class *class)
{
return 0;
}
#endif
#ifdef CONFIG_DEBUG_LOCKDEP
#include <asm/local.h>
/*
* Various lockdep statistics.
* We want them per cpu as they are often accessed in fast path
* and we want to avoid too much cache bouncing.
*/
struct lockdep_stats {
int chain_lookup_hits;
int chain_lookup_misses;
int hardirqs_on_events;
int hardirqs_off_events;
int redundant_hardirqs_on;
int redundant_hardirqs_off;
int softirqs_on_events;
int softirqs_off_events;
int redundant_softirqs_on;
int redundant_softirqs_off;
int nr_unused_locks;
int nr_cyclic_checks;
int nr_cyclic_check_recursions;
int nr_find_usage_forwards_checks;
int nr_find_usage_forwards_recursions;
int nr_find_usage_backwards_checks;
int nr_find_usage_backwards_recursions;
};
DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
#define __debug_atomic_inc(ptr) \
this_cpu_inc(lockdep_stats.ptr);
#define debug_atomic_inc(ptr) { \
WARN_ON_ONCE(!irqs_disabled()); \
__this_cpu_inc(lockdep_stats.ptr); \
}
#define debug_atomic_dec(ptr) { \
WARN_ON_ONCE(!irqs_disabled()); \
__this_cpu_dec(lockdep_stats.ptr); \
}
#define debug_atomic_read(ptr) ({ \
struct lockdep_stats *__cpu_lockdep_stats; \
unsigned long long __total = 0; \
int __cpu; \
for_each_possible_cpu(__cpu) { \
__cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \
__total += __cpu_lockdep_stats->ptr; \
} \
__total; \
})
#else
# define __debug_atomic_inc(ptr) do { } while (0)
# define debug_atomic_inc(ptr) do { } while (0)
# define debug_atomic_dec(ptr) do { } while (0)
# define debug_atomic_read(ptr) 0
#endif

View file

@ -0,0 +1,683 @@
/*
* kernel/lockdep_proc.c
*
* Runtime locking correctness validator
*
* Started by Ingo Molnar:
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*
* Code for /proc/lockdep and /proc/lockdep_stats:
*
*/
#include <linux/export.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
#include <linux/vmalloc.h>
#include <linux/sort.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
#include "lockdep_internals.h"
static void *l_next(struct seq_file *m, void *v, loff_t *pos)
{
return seq_list_next(v, &all_lock_classes, pos);
}
static void *l_start(struct seq_file *m, loff_t *pos)
{
return seq_list_start_head(&all_lock_classes, *pos);
}
static void l_stop(struct seq_file *m, void *v)
{
}
static void print_name(struct seq_file *m, struct lock_class *class)
{
char str[KSYM_NAME_LEN];
const char *name = class->name;
if (!name) {
name = __get_key_name(class->key, str);
seq_printf(m, "%s", name);
} else{
seq_printf(m, "%s", name);
if (class->name_version > 1)
seq_printf(m, "#%d", class->name_version);
if (class->subclass)
seq_printf(m, "/%d", class->subclass);
}
}
static int l_show(struct seq_file *m, void *v)
{
struct lock_class *class = list_entry(v, struct lock_class, lock_entry);
struct lock_list *entry;
char usage[LOCK_USAGE_CHARS];
if (v == &all_lock_classes) {
seq_printf(m, "all lock classes:\n");
return 0;
}
seq_printf(m, "%p", class->key);
#ifdef CONFIG_DEBUG_LOCKDEP
seq_printf(m, " OPS:%8ld", class->ops);
#endif
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
#endif
get_usage_chars(class, usage);
seq_printf(m, " %s", usage);
seq_printf(m, ": ");
print_name(m, class);
seq_puts(m, "\n");
list_for_each_entry(entry, &class->locks_after, entry) {
if (entry->distance == 1) {
seq_printf(m, " -> [%p] ", entry->class->key);
print_name(m, entry->class);
seq_puts(m, "\n");
}
}
seq_puts(m, "\n");
return 0;
}
static const struct seq_operations lockdep_ops = {
.start = l_start,
.next = l_next,
.stop = l_stop,
.show = l_show,
};
static int lockdep_open(struct inode *inode, struct file *file)
{
return seq_open(file, &lockdep_ops);
}
static const struct file_operations proc_lockdep_operations = {
.open = lockdep_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#ifdef CONFIG_PROVE_LOCKING
static void *lc_start(struct seq_file *m, loff_t *pos)
{
if (*pos == 0)
return SEQ_START_TOKEN;
if (*pos - 1 < nr_lock_chains)
return lock_chains + (*pos - 1);
return NULL;
}
static void *lc_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return lc_start(m, pos);
}
static void lc_stop(struct seq_file *m, void *v)
{
}
static int lc_show(struct seq_file *m, void *v)
{
struct lock_chain *chain = v;
struct lock_class *class;
int i;
if (v == SEQ_START_TOKEN) {
seq_printf(m, "all lock chains:\n");
return 0;
}
seq_printf(m, "irq_context: %d\n", chain->irq_context);
for (i = 0; i < chain->depth; i++) {
class = lock_chain_get_class(chain, i);
if (!class->key)
continue;
seq_printf(m, "[%p] ", class->key);
print_name(m, class);
seq_puts(m, "\n");
}
seq_puts(m, "\n");
return 0;
}
static const struct seq_operations lockdep_chains_ops = {
.start = lc_start,
.next = lc_next,
.stop = lc_stop,
.show = lc_show,
};
static int lockdep_chains_open(struct inode *inode, struct file *file)
{
return seq_open(file, &lockdep_chains_ops);
}
static const struct file_operations proc_lockdep_chains_operations = {
.open = lockdep_chains_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* CONFIG_PROVE_LOCKING */
static void lockdep_stats_debug_show(struct seq_file *m)
{
#ifdef CONFIG_DEBUG_LOCKDEP
unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
hi2 = debug_atomic_read(hardirqs_off_events),
hr1 = debug_atomic_read(redundant_hardirqs_on),
hr2 = debug_atomic_read(redundant_hardirqs_off),
si1 = debug_atomic_read(softirqs_on_events),
si2 = debug_atomic_read(softirqs_off_events),
sr1 = debug_atomic_read(redundant_softirqs_on),
sr2 = debug_atomic_read(redundant_softirqs_off);
seq_printf(m, " chain lookup misses: %11llu\n",
debug_atomic_read(chain_lookup_misses));
seq_printf(m, " chain lookup hits: %11llu\n",
debug_atomic_read(chain_lookup_hits));
seq_printf(m, " cyclic checks: %11llu\n",
debug_atomic_read(nr_cyclic_checks));
seq_printf(m, " find-mask forwards checks: %11llu\n",
debug_atomic_read(nr_find_usage_forwards_checks));
seq_printf(m, " find-mask backwards checks: %11llu\n",
debug_atomic_read(nr_find_usage_backwards_checks));
seq_printf(m, " hardirq on events: %11llu\n", hi1);
seq_printf(m, " hardirq off events: %11llu\n", hi2);
seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
seq_printf(m, " softirq on events: %11llu\n", si1);
seq_printf(m, " softirq off events: %11llu\n", si2);
seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
#endif
}
static int lockdep_stats_show(struct seq_file *m, void *v)
{
struct lock_class *class;
unsigned long nr_unused = 0, nr_uncategorized = 0,
nr_irq_safe = 0, nr_irq_unsafe = 0,
nr_softirq_safe = 0, nr_softirq_unsafe = 0,
nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
sum_forward_deps = 0;
list_for_each_entry(class, &all_lock_classes, lock_entry) {
if (class->usage_mask == 0)
nr_unused++;
if (class->usage_mask == LOCKF_USED)
nr_uncategorized++;
if (class->usage_mask & LOCKF_USED_IN_IRQ)
nr_irq_safe++;
if (class->usage_mask & LOCKF_ENABLED_IRQ)
nr_irq_unsafe++;
if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
nr_softirq_safe++;
if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
nr_softirq_unsafe++;
if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
nr_hardirq_safe++;
if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
nr_hardirq_unsafe++;
if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
nr_irq_read_safe++;
if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
nr_irq_read_unsafe++;
if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
nr_softirq_read_safe++;
if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
nr_softirq_read_unsafe++;
if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
nr_hardirq_read_safe++;
if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
nr_hardirq_read_unsafe++;
#ifdef CONFIG_PROVE_LOCKING
sum_forward_deps += lockdep_count_forward_deps(class);
#endif
}
#ifdef CONFIG_DEBUG_LOCKDEP
DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
#endif
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
nr_lock_classes, MAX_LOCKDEP_KEYS);
seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
nr_list_entries, MAX_LOCKDEP_ENTRIES);
seq_printf(m, " indirect dependencies: %11lu\n",
sum_forward_deps);
/*
* Total number of dependencies:
*
* All irq-safe locks may nest inside irq-unsafe locks,
* plus all the other known dependencies:
*/
seq_printf(m, " all direct dependencies: %11lu\n",
nr_irq_unsafe * nr_irq_safe +
nr_hardirq_unsafe * nr_hardirq_safe +
nr_list_entries);
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
nr_lock_chains, MAX_LOCKDEP_CHAINS);
seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n",
nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
seq_printf(m, " in-hardirq chains: %11u\n",
nr_hardirq_chains);
seq_printf(m, " in-softirq chains: %11u\n",
nr_softirq_chains);
#endif
seq_printf(m, " in-process chains: %11u\n",
nr_process_chains);
seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
seq_printf(m, " combined max dependencies: %11u\n",
(nr_hardirq_chains + 1) *
(nr_softirq_chains + 1) *
(nr_process_chains + 1)
);
seq_printf(m, " hardirq-safe locks: %11lu\n",
nr_hardirq_safe);
seq_printf(m, " hardirq-unsafe locks: %11lu\n",
nr_hardirq_unsafe);
seq_printf(m, " softirq-safe locks: %11lu\n",
nr_softirq_safe);
seq_printf(m, " softirq-unsafe locks: %11lu\n",
nr_softirq_unsafe);
seq_printf(m, " irq-safe locks: %11lu\n",
nr_irq_safe);
seq_printf(m, " irq-unsafe locks: %11lu\n",
nr_irq_unsafe);
seq_printf(m, " hardirq-read-safe locks: %11lu\n",
nr_hardirq_read_safe);
seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
nr_hardirq_read_unsafe);
seq_printf(m, " softirq-read-safe locks: %11lu\n",
nr_softirq_read_safe);
seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
nr_softirq_read_unsafe);
seq_printf(m, " irq-read-safe locks: %11lu\n",
nr_irq_read_safe);
seq_printf(m, " irq-read-unsafe locks: %11lu\n",
nr_irq_read_unsafe);
seq_printf(m, " uncategorized locks: %11lu\n",
nr_uncategorized);
seq_printf(m, " unused locks: %11lu\n",
nr_unused);
seq_printf(m, " max locking depth: %11u\n",
max_lockdep_depth);
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " max bfs queue depth: %11u\n",
max_bfs_queue_depth);
#endif
lockdep_stats_debug_show(m);
seq_printf(m, " debug_locks: %11u\n",
debug_locks);
return 0;
}
static int lockdep_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, lockdep_stats_show, NULL);
}
static const struct file_operations proc_lockdep_stats_operations = {
.open = lockdep_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#ifdef CONFIG_LOCK_STAT
struct lock_stat_data {
struct lock_class *class;
struct lock_class_stats stats;
};
struct lock_stat_seq {
struct lock_stat_data *iter_end;
struct lock_stat_data stats[MAX_LOCKDEP_KEYS];
};
/*
* sort on absolute number of contentions
*/
static int lock_stat_cmp(const void *l, const void *r)
{
const struct lock_stat_data *dl = l, *dr = r;
unsigned long nl, nr;
nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr;
nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr;
return nr - nl;
}
static void seq_line(struct seq_file *m, char c, int offset, int length)
{
int i;
for (i = 0; i < offset; i++)
seq_puts(m, " ");
for (i = 0; i < length; i++)
seq_printf(m, "%c", c);
seq_puts(m, "\n");
}
static void snprint_time(char *buf, size_t bufsiz, s64 nr)
{
s64 div;
s32 rem;
nr += 5; /* for display rounding */
div = div_s64_rem(nr, 1000, &rem);
snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
}
static void seq_time(struct seq_file *m, s64 time)
{
char num[15];
snprint_time(num, sizeof(num), time);
seq_printf(m, " %14s", num);
}
static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
{
seq_printf(m, "%14lu", lt->nr);
seq_time(m, lt->min);
seq_time(m, lt->max);
seq_time(m, lt->total);
seq_time(m, lt->nr ? div_s64(lt->total, lt->nr) : 0);
}
static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
{
char name[39];
struct lock_class *class;
struct lock_class_stats *stats;
int i, namelen;
class = data->class;
stats = &data->stats;
namelen = 38;
if (class->name_version > 1)
namelen -= 2; /* XXX truncates versions > 9 */
if (class->subclass)
namelen -= 2;
if (!class->name) {
char str[KSYM_NAME_LEN];
const char *key_name;
key_name = __get_key_name(class->key, str);
snprintf(name, namelen, "%s", key_name);
} else {
snprintf(name, namelen, "%s", class->name);
}
namelen = strlen(name);
if (class->name_version > 1) {
snprintf(name+namelen, 3, "#%d", class->name_version);
namelen += 2;
}
if (class->subclass) {
snprintf(name+namelen, 3, "/%d", class->subclass);
namelen += 2;
}
if (stats->write_holdtime.nr) {
if (stats->read_holdtime.nr)
seq_printf(m, "%38s-W:", name);
else
seq_printf(m, "%40s:", name);
seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
seq_lock_time(m, &stats->write_waittime);
seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
seq_lock_time(m, &stats->write_holdtime);
seq_puts(m, "\n");
}
if (stats->read_holdtime.nr) {
seq_printf(m, "%38s-R:", name);
seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]);
seq_lock_time(m, &stats->read_waittime);
seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]);
seq_lock_time(m, &stats->read_holdtime);
seq_puts(m, "\n");
}
if (stats->read_waittime.nr + stats->write_waittime.nr == 0)
return;
if (stats->read_holdtime.nr)
namelen += 2;
for (i = 0; i < LOCKSTAT_POINTS; i++) {
char ip[32];
if (class->contention_point[i] == 0)
break;
if (!i)
seq_line(m, '-', 40-namelen, namelen);
snprintf(ip, sizeof(ip), "[<%p>]",
(void *)class->contention_point[i]);
seq_printf(m, "%40s %14lu %29s %pS\n",
name, stats->contention_point[i],
ip, (void *)class->contention_point[i]);
}
for (i = 0; i < LOCKSTAT_POINTS; i++) {
char ip[32];
if (class->contending_point[i] == 0)
break;
if (!i)
seq_line(m, '-', 40-namelen, namelen);
snprintf(ip, sizeof(ip), "[<%p>]",
(void *)class->contending_point[i]);
seq_printf(m, "%40s %14lu %29s %pS\n",
name, stats->contending_point[i],
ip, (void *)class->contending_point[i]);
}
if (i) {
seq_puts(m, "\n");
seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1));
seq_puts(m, "\n");
}
}
static void seq_header(struct seq_file *m)
{
seq_puts(m, "lock_stat version 0.4\n");
if (unlikely(!debug_locks))
seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
"%14s %14s\n",
"class name",
"con-bounces",
"contentions",
"waittime-min",
"waittime-max",
"waittime-total",
"waittime-avg",
"acq-bounces",
"acquisitions",
"holdtime-min",
"holdtime-max",
"holdtime-total",
"holdtime-avg");
seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
seq_printf(m, "\n");
}
static void *ls_start(struct seq_file *m, loff_t *pos)
{
struct lock_stat_seq *data = m->private;
struct lock_stat_data *iter;
if (*pos == 0)
return SEQ_START_TOKEN;
iter = data->stats + (*pos - 1);
if (iter >= data->iter_end)
iter = NULL;
return iter;
}
static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return ls_start(m, pos);
}
static void ls_stop(struct seq_file *m, void *v)
{
}
static int ls_show(struct seq_file *m, void *v)
{
if (v == SEQ_START_TOKEN)
seq_header(m);
else
seq_stats(m, v);
return 0;
}
static const struct seq_operations lockstat_ops = {
.start = ls_start,
.next = ls_next,
.stop = ls_stop,
.show = ls_show,
};
static int lock_stat_open(struct inode *inode, struct file *file)
{
int res;
struct lock_class *class;
struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq));
if (!data)
return -ENOMEM;
res = seq_open(file, &lockstat_ops);
if (!res) {
struct lock_stat_data *iter = data->stats;
struct seq_file *m = file->private_data;
list_for_each_entry(class, &all_lock_classes, lock_entry) {
iter->class = class;
iter->stats = lock_stats(class);
iter++;
}
data->iter_end = iter;
sort(data->stats, data->iter_end - data->stats,
sizeof(struct lock_stat_data),
lock_stat_cmp, NULL);
m->private = data;
} else
vfree(data);
return res;
}
static ssize_t lock_stat_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct lock_class *class;
char c;
if (count) {
if (get_user(c, buf))
return -EFAULT;
if (c != '0')
return count;
list_for_each_entry(class, &all_lock_classes, lock_entry)
clear_lock_stats(class);
}
return count;
}
static int lock_stat_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
vfree(seq->private);
return seq_release(inode, file);
}
static const struct file_operations proc_lock_stat_operations = {
.open = lock_stat_open,
.write = lock_stat_write,
.read = seq_read,
.llseek = seq_lseek,
.release = lock_stat_release,
};
#endif /* CONFIG_LOCK_STAT */
static int __init lockdep_proc_init(void)
{
proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations);
#ifdef CONFIG_PROVE_LOCKING
proc_create("lockdep_chains", S_IRUSR, NULL,
&proc_lockdep_chains_operations);
#endif
proc_create("lockdep_stats", S_IRUSR, NULL,
&proc_lockdep_stats_operations);
#ifdef CONFIG_LOCK_STAT
proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
&proc_lock_stat_operations);
#endif
return 0;
}
__initcall(lockdep_proc_init);

View file

@ -0,0 +1,9 @@
/*
* Lockdep states,
*
* please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
* you add one, or come up with a nice dynamic solution.
*/
LOCKDEP_STATE(HARDIRQ)
LOCKDEP_STATE(SOFTIRQ)
LOCKDEP_STATE(RECLAIM_FS)

View file

@ -0,0 +1,815 @@
/*
* Module-based torture test facility for locking
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright (C) IBM Corporation, 2014
*
* Author: Paul E. McKenney <paulmck@us.ibm.com>
* Based on kernel/rcu/torture.c.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/rwlock.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/moduleparam.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/torture.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
torture_param(int, nwriters_stress, -1,
"Number of write-locking stress-test threads");
torture_param(int, nreaders_stress, -1,
"Number of read-locking stress-test threads");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0,
"Time between CPU hotplugs (s), 0=disable");
torture_param(int, shuffle_interval, 3,
"Number of jiffies between shuffles, 0=disable");
torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
torture_param(int, stat_interval, 60,
"Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(bool, verbose, true,
"Enable verbose debugging printk()s");
static char *torture_type = "spin_lock";
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type,
"Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
static struct task_struct *stats_task;
static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks;
static bool lock_is_write_held;
static bool lock_is_read_held;
struct lock_stress_stats {
long n_lock_fail;
long n_lock_acquired;
};
#if defined(MODULE)
#define LOCKTORTURE_RUNNABLE_INIT 1
#else
#define LOCKTORTURE_RUNNABLE_INIT 0
#endif
int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
module_param(torture_runnable, int, 0444);
MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
/* Forward reference. */
static void lock_torture_cleanup(void);
/*
* Operations vector for selecting different types of tests.
*/
struct lock_torture_ops {
void (*init)(void);
int (*writelock)(void);
void (*write_delay)(struct torture_random_state *trsp);
void (*writeunlock)(void);
int (*readlock)(void);
void (*read_delay)(struct torture_random_state *trsp);
void (*readunlock)(void);
unsigned long flags;
const char *name;
};
struct lock_torture_cxt {
int nrealwriters_stress;
int nrealreaders_stress;
bool debug_lock;
atomic_t n_lock_torture_errors;
struct lock_torture_ops *cur_ops;
struct lock_stress_stats *lwsa; /* writer statistics */
struct lock_stress_stats *lrsa; /* reader statistics */
};
static struct lock_torture_cxt cxt = { 0, 0, false,
ATOMIC_INIT(0),
NULL, NULL};
/*
* Definitions for lock torture testing.
*/
static int torture_lock_busted_write_lock(void)
{
return 0; /* BUGGY, do not use in real life!!! */
}
static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_us = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_us)))
mdelay(longdelay_us);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_lock_busted_write_unlock(void)
{
/* BUGGY, do not use in real life!!! */
}
static struct lock_torture_ops lock_busted_ops = {
.writelock = torture_lock_busted_write_lock,
.write_delay = torture_lock_busted_write_delay,
.writeunlock = torture_lock_busted_write_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "lock_busted"
};
static DEFINE_SPINLOCK(torture_spinlock);
static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock)
{
spin_lock(&torture_spinlock);
return 0;
}
static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
const unsigned long longdelay_us = 100;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_us)))
mdelay(longdelay_us);
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2 * shortdelay_us)))
udelay(shortdelay_us);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
{
spin_unlock(&torture_spinlock);
}
static struct lock_torture_ops spin_lock_ops = {
.writelock = torture_spin_lock_write_lock,
.write_delay = torture_spin_lock_write_delay,
.writeunlock = torture_spin_lock_write_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "spin_lock"
};
static int torture_spin_lock_write_lock_irq(void)
__acquires(torture_spinlock)
{
unsigned long flags;
spin_lock_irqsave(&torture_spinlock, flags);
cxt.cur_ops->flags = flags;
return 0;
}
static void torture_lock_spin_write_unlock_irq(void)
__releases(torture_spinlock)
{
spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags);
}
static struct lock_torture_ops spin_lock_irq_ops = {
.writelock = torture_spin_lock_write_lock_irq,
.write_delay = torture_spin_lock_write_delay,
.writeunlock = torture_lock_spin_write_unlock_irq,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "spin_lock_irq"
};
static DEFINE_RWLOCK(torture_rwlock);
static int torture_rwlock_write_lock(void) __acquires(torture_rwlock)
{
write_lock(&torture_rwlock);
return 0;
}
static void torture_rwlock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
const unsigned long longdelay_ms = 100;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
else
udelay(shortdelay_us);
}
static void torture_rwlock_write_unlock(void) __releases(torture_rwlock)
{
write_unlock(&torture_rwlock);
}
static int torture_rwlock_read_lock(void) __acquires(torture_rwlock)
{
read_lock(&torture_rwlock);
return 0;
}
static void torture_rwlock_read_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 10;
const unsigned long longdelay_ms = 100;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
if (!(torture_random(trsp) %
(cxt.nrealreaders_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
else
udelay(shortdelay_us);
}
static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
{
read_unlock(&torture_rwlock);
}
static struct lock_torture_ops rw_lock_ops = {
.writelock = torture_rwlock_write_lock,
.write_delay = torture_rwlock_write_delay,
.writeunlock = torture_rwlock_write_unlock,
.readlock = torture_rwlock_read_lock,
.read_delay = torture_rwlock_read_delay,
.readunlock = torture_rwlock_read_unlock,
.name = "rw_lock"
};
static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock)
{
unsigned long flags;
write_lock_irqsave(&torture_rwlock, flags);
cxt.cur_ops->flags = flags;
return 0;
}
static void torture_rwlock_write_unlock_irq(void)
__releases(torture_rwlock)
{
write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
}
static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
{
unsigned long flags;
read_lock_irqsave(&torture_rwlock, flags);
cxt.cur_ops->flags = flags;
return 0;
}
static void torture_rwlock_read_unlock_irq(void)
__releases(torture_rwlock)
{
write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
}
static struct lock_torture_ops rw_lock_irq_ops = {
.writelock = torture_rwlock_write_lock_irq,
.write_delay = torture_rwlock_write_delay,
.writeunlock = torture_rwlock_write_unlock_irq,
.readlock = torture_rwlock_read_lock_irq,
.read_delay = torture_rwlock_read_delay,
.readunlock = torture_rwlock_read_unlock_irq,
.name = "rw_lock_irq"
};
static DEFINE_MUTEX(torture_mutex);
static int torture_mutex_lock(void) __acquires(torture_mutex)
{
mutex_lock(&torture_mutex);
return 0;
}
static void torture_mutex_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 5);
else
mdelay(longdelay_ms / 5);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_mutex_unlock(void) __releases(torture_mutex)
{
mutex_unlock(&torture_mutex);
}
static struct lock_torture_ops mutex_lock_ops = {
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
.writeunlock = torture_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "mutex_lock"
};
static DECLARE_RWSEM(torture_rwsem);
static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
{
down_write(&torture_rwsem);
return 0;
}
static void torture_rwsem_write_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 10);
else
mdelay(longdelay_ms / 10);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_rwsem_up_write(void) __releases(torture_rwsem)
{
up_write(&torture_rwsem);
}
static int torture_rwsem_down_read(void) __acquires(torture_rwsem)
{
down_read(&torture_rwsem);
return 0;
}
static void torture_rwsem_read_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 2);
else
mdelay(longdelay_ms / 2);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_rwsem_up_read(void) __releases(torture_rwsem)
{
up_read(&torture_rwsem);
}
static struct lock_torture_ops rwsem_lock_ops = {
.writelock = torture_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.writeunlock = torture_rwsem_up_write,
.readlock = torture_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
.readunlock = torture_rwsem_up_read,
.name = "rwsem_lock"
};
/*
* Lock torture writer kthread. Repeatedly acquires and releases
* the lock, checking for duplicate acquisitions.
*/
static int lock_torture_writer(void *arg)
{
struct lock_stress_stats *lwsp = arg;
static DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("lock_torture_writer task started");
set_user_nice(current, MAX_NICE);
do {
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
cxt.cur_ops->writelock();
if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++;
lock_is_write_held = 1;
if (WARN_ON_ONCE(lock_is_read_held))
lwsp->n_lock_fail++; /* rare, but... */
lwsp->n_lock_acquired++;
cxt.cur_ops->write_delay(&rand);
lock_is_write_held = 0;
cxt.cur_ops->writeunlock();
stutter_wait("lock_torture_writer");
} while (!torture_must_stop());
torture_kthread_stopping("lock_torture_writer");
return 0;
}
/*
* Lock torture reader kthread. Repeatedly acquires and releases
* the reader lock.
*/
static int lock_torture_reader(void *arg)
{
struct lock_stress_stats *lrsp = arg;
static DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("lock_torture_reader task started");
set_user_nice(current, MAX_NICE);
do {
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
cxt.cur_ops->readlock();
lock_is_read_held = 1;
if (WARN_ON_ONCE(lock_is_write_held))
lrsp->n_lock_fail++; /* rare, but... */
lrsp->n_lock_acquired++;
cxt.cur_ops->read_delay(&rand);
lock_is_read_held = 0;
cxt.cur_ops->readunlock();
stutter_wait("lock_torture_reader");
} while (!torture_must_stop());
torture_kthread_stopping("lock_torture_reader");
return 0;
}
/*
* Create an lock-torture-statistics message in the specified buffer.
*/
static void __torture_print_stats(char *page,
struct lock_stress_stats *statp, bool write)
{
bool fail = 0;
int i, n_stress;
long max = 0;
long min = statp[0].n_lock_acquired;
long long sum = 0;
n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
for (i = 0; i < n_stress; i++) {
if (statp[i].n_lock_fail)
fail = true;
sum += statp[i].n_lock_acquired;
if (max < statp[i].n_lock_fail)
max = statp[i].n_lock_fail;
if (min > statp[i].n_lock_fail)
min = statp[i].n_lock_fail;
}
page += sprintf(page,
"%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
write ? "Writes" : "Reads ",
sum, max, min, max / 2 > min ? "???" : "",
fail, fail ? "!!!" : "");
if (fail)
atomic_inc(&cxt.n_lock_torture_errors);
}
/*
* Print torture statistics. Caller must ensure that there is only one
* call to this function at a given time!!! This is normally accomplished
* by relying on the module system to only have one copy of the module
* loaded, and then by giving the lock_torture_stats kthread full control
* (or the init/cleanup functions when lock_torture_stats thread is not
* running).
*/
static void lock_torture_stats_print(void)
{
int size = cxt.nrealwriters_stress * 200 + 8192;
char *buf;
if (cxt.cur_ops->readlock)
size += cxt.nrealreaders_stress * 200 + 8192;
buf = kmalloc(size, GFP_KERNEL);
if (!buf) {
pr_err("lock_torture_stats_print: Out of memory, need: %d",
size);
return;
}
__torture_print_stats(buf, cxt.lwsa, true);
pr_alert("%s", buf);
kfree(buf);
if (cxt.cur_ops->readlock) {
buf = kmalloc(size, GFP_KERNEL);
if (!buf) {
pr_err("lock_torture_stats_print: Out of memory, need: %d",
size);
return;
}
__torture_print_stats(buf, cxt.lrsa, false);
pr_alert("%s", buf);
kfree(buf);
}
}
/*
* Periodically prints torture statistics, if periodic statistics printing
* was specified via the stat_interval module parameter.
*
* No need to worry about fullstop here, since this one doesn't reference
* volatile state or register callbacks.
*/
static int lock_torture_stats(void *arg)
{
VERBOSE_TOROUT_STRING("lock_torture_stats task started");
do {
schedule_timeout_interruptible(stat_interval * HZ);
lock_torture_stats_print();
torture_shutdown_absorb("lock_torture_stats");
} while (!torture_must_stop());
torture_kthread_stopping("lock_torture_stats");
return 0;
}
static inline void
lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
const char *tag)
{
pr_alert("%s" TORTURE_FLAG
"--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
torture_type, tag, cxt.debug_lock ? " [debug]": "",
cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
verbose, shuffle_interval, stutter, shutdown_secs,
onoff_interval, onoff_holdoff);
}
static void lock_torture_cleanup(void)
{
int i;
if (torture_cleanup_begin())
return;
if (writer_tasks) {
for (i = 0; i < cxt.nrealwriters_stress; i++)
torture_stop_kthread(lock_torture_writer,
writer_tasks[i]);
kfree(writer_tasks);
writer_tasks = NULL;
}
if (reader_tasks) {
for (i = 0; i < cxt.nrealreaders_stress; i++)
torture_stop_kthread(lock_torture_reader,
reader_tasks[i]);
kfree(reader_tasks);
reader_tasks = NULL;
}
torture_stop_kthread(lock_torture_stats, stats_task);
lock_torture_stats_print(); /* -After- the stats thread is stopped! */
if (atomic_read(&cxt.n_lock_torture_errors))
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: FAILURE");
else if (torture_onoff_failures())
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: LOCK_HOTPLUG");
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
torture_cleanup_end();
}
static int __init lock_torture_init(void)
{
int i, j;
int firsterr = 0;
static struct lock_torture_ops *torture_ops[] = {
&lock_busted_ops,
&spin_lock_ops, &spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
&rwsem_lock_ops,
};
if (!torture_init_begin(torture_type, verbose, &torture_runnable))
return -EBUSY;
/* Process args and tell the world that the torturer is on the job. */
for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
cxt.cur_ops = torture_ops[i];
if (strcmp(torture_type, cxt.cur_ops->name) == 0)
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
pr_alert("lock-torture: invalid torture type: \"%s\"\n",
torture_type);
pr_alert("lock-torture types:");
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
pr_alert(" %s", torture_ops[i]->name);
pr_alert("\n");
torture_init_end();
return -EINVAL;
}
if (cxt.cur_ops->init)
cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
if (nwriters_stress >= 0)
cxt.nrealwriters_stress = nwriters_stress;
else
cxt.nrealwriters_stress = 2 * num_online_cpus();
#ifdef CONFIG_DEBUG_MUTEXES
if (strncmp(torture_type, "mutex", 5) == 0)
cxt.debug_lock = true;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
if ((strncmp(torture_type, "spin", 4) == 0) ||
(strncmp(torture_type, "rw_lock", 7) == 0))
cxt.debug_lock = true;
#endif
/* Initialize the statistics so that each run gets its own numbers. */
lock_is_write_held = 0;
cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
if (cxt.lwsa == NULL) {
VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < cxt.nrealwriters_stress; i++) {
cxt.lwsa[i].n_lock_fail = 0;
cxt.lwsa[i].n_lock_acquired = 0;
}
if (cxt.cur_ops->readlock) {
if (nreaders_stress >= 0)
cxt.nrealreaders_stress = nreaders_stress;
else {
/*
* By default distribute evenly the number of
* readers and writers. We still run the same number
* of threads as the writer-only locks default.
*/
if (nwriters_stress < 0) /* user doesn't care */
cxt.nrealwriters_stress = num_online_cpus();
cxt.nrealreaders_stress = cxt.nrealwriters_stress;
}
lock_is_read_held = 0;
cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
if (cxt.lrsa == NULL) {
VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
firsterr = -ENOMEM;
kfree(cxt.lwsa);
goto unwind;
}
for (i = 0; i < cxt.nrealreaders_stress; i++) {
cxt.lrsa[i].n_lock_fail = 0;
cxt.lrsa[i].n_lock_acquired = 0;
}
}
lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
/* Prepare torture context. */
if (onoff_interval > 0) {
firsterr = torture_onoff_init(onoff_holdoff * HZ,
onoff_interval * HZ);
if (firsterr)
goto unwind;
}
if (shuffle_interval > 0) {
firsterr = torture_shuffle_init(shuffle_interval);
if (firsterr)
goto unwind;
}
if (shutdown_secs > 0) {
firsterr = torture_shutdown_init(shutdown_secs,
lock_torture_cleanup);
if (firsterr)
goto unwind;
}
if (stutter > 0) {
firsterr = torture_stutter_init(stutter);
if (firsterr)
goto unwind;
}
writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
GFP_KERNEL);
if (writer_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
if (cxt.cur_ops->readlock) {
reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
}
/*
* Create the kthreads and start torturing (oh, those poor little locks).
*
* TODO: Note that we interleave writers with readers, giving writers a
* slight advantage, by creating its kthread first. This can be modified
* for very specific needs, or even let the user choose the policy, if
* ever wanted.
*/
for (i = 0, j = 0; i < cxt.nrealwriters_stress ||
j < cxt.nrealreaders_stress; i++, j++) {
if (i >= cxt.nrealwriters_stress)
goto create_reader;
/* Create writer. */
firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
writer_tasks[i]);
if (firsterr)
goto unwind;
create_reader:
if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress))
continue;
/* Create reader. */
firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j],
reader_tasks[j]);
if (firsterr)
goto unwind;
}
if (stat_interval > 0) {
firsterr = torture_create_kthread(lock_torture_stats, NULL,
stats_task);
if (firsterr)
goto unwind;
}
torture_init_end();
return 0;
unwind:
torture_init_end();
lock_torture_cleanup();
return firsterr;
}
module_init(lock_torture_init);
module_exit(lock_torture_cleanup);

View file

@ -0,0 +1,208 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include "mcs_spinlock.h"
#ifdef CONFIG_SMP
/*
* An MCS like lock especially tailored for optimistic spinning for sleeping
* lock implementations (mutex, rwsem, etc).
*
* Using a single mcs node per CPU is safe because sleeping locks should not be
* called from interrupt context and we have preemption disabled while
* spinning.
*/
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
/*
* We use the value 0 to represent "no CPU", thus the encoded value
* will be the CPU number incremented by 1.
*/
static inline int encode_cpu(int cpu_nr)
{
return cpu_nr + 1;
}
static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
{
int cpu_nr = encoded_cpu_val - 1;
return per_cpu_ptr(&osq_node, cpu_nr);
}
/*
* Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
* Can return NULL in case we were the last queued and we updated @lock instead.
*/
static inline struct optimistic_spin_node *
osq_wait_next(struct optimistic_spin_queue *lock,
struct optimistic_spin_node *node,
struct optimistic_spin_node *prev)
{
struct optimistic_spin_node *next = NULL;
int curr = encode_cpu(smp_processor_id());
int old;
/*
* If there is a prev node in queue, then the 'old' value will be
* the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
* we're currently last in queue, then the queue will then become empty.
*/
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
for (;;) {
if (atomic_read(&lock->tail) == curr &&
atomic_cmpxchg(&lock->tail, curr, old) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
* unlock()/unqueue().
*/
break;
}
/*
* We must xchg() the @node->next value, because if we were to
* leave it in, a concurrent unlock()/unqueue() from
* @node->next might complete Step-A and think its @prev is
* still valid.
*
* If the concurrent unlock()/unqueue() wins the race, we'll
* wait for either @lock to point to us, through its Step-B, or
* wait for a new @node->next from its Step-C.
*/
if (node->next) {
next = xchg(&node->next, NULL);
if (next)
break;
}
cpu_relax_lowlatency();
}
return next;
}
bool osq_lock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
struct optimistic_spin_node *prev, *next;
int curr = encode_cpu(smp_processor_id());
int old;
node->locked = 0;
node->next = NULL;
node->cpu = curr;
old = atomic_xchg(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
prev = decode_cpu(old);
node->prev = prev;
ACCESS_ONCE(prev->next) = node;
/*
* Normally @prev is untouchable after the above store; because at that
* moment unlock can proceed and wipe the node element from stack.
*
* However, since our nodes are static per-cpu storage, we're
* guaranteed their existence -- this allows us to apply
* cmpxchg in an attempt to undo our queueing.
*/
while (!smp_load_acquire(&node->locked)) {
/*
* If we need to reschedule bail... so we can block.
*/
if (need_resched())
goto unqueue;
cpu_relax_lowlatency();
}
return true;
unqueue:
/*
* Step - A -- stabilize @prev
*
* Undo our @prev->next assignment; this will make @prev's
* unlock()/unqueue() wait for a next pointer since @lock points to us
* (or later).
*/
for (;;) {
if (prev->next == node &&
cmpxchg(&prev->next, node, NULL) == node)
break;
/*
* We can only fail the cmpxchg() racing against an unlock(),
* in which case we should observe @node->locked becomming
* true.
*/
if (smp_load_acquire(&node->locked))
return true;
cpu_relax_lowlatency();
/*
* Or we race against a concurrent unqueue()'s step-B, in which
* case its step-C will write us a new @node->prev pointer.
*/
prev = ACCESS_ONCE(node->prev);
}
/*
* Step - B -- stabilize @next
*
* Similar to unlock(), wait for @node->next or move @lock from @node
* back to @prev.
*/
next = osq_wait_next(lock, node, prev);
if (!next)
return false;
/*
* Step - C -- unlink
*
* @prev is stable because its still waiting for a new @prev->next
* pointer, @next is stable because our @node->next pointer is NULL and
* it will wait in Step-A.
*/
ACCESS_ONCE(next->prev) = prev;
ACCESS_ONCE(prev->next) = next;
return false;
}
void osq_unlock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_node *node, *next;
int curr = encode_cpu(smp_processor_id());
/*
* Fast path for the uncontended case.
*/
if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
return;
/*
* Second most likely case.
*/
node = this_cpu_ptr(&osq_node);
next = xchg(&node->next, NULL);
if (next) {
ACCESS_ONCE(next->locked) = 1;
return;
}
next = osq_wait_next(lock, node, NULL);
if (next)
ACCESS_ONCE(next->locked) = 1;
}
#endif

View file

@ -0,0 +1,127 @@
/*
* MCS lock defines
*
* This file contains the main data structure and API definitions of MCS lock.
*
* The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
* with the desirable properties of being fair, and with each cpu trying
* to acquire the lock spinning on a local variable.
* It avoids expensive cache bouncings that common test-and-set spin-lock
* implementations incur.
*/
#ifndef __LINUX_MCS_SPINLOCK_H
#define __LINUX_MCS_SPINLOCK_H
#include <asm/mcs_spinlock.h>
struct mcs_spinlock {
struct mcs_spinlock *next;
int locked; /* 1 if lock acquired */
};
#ifndef arch_mcs_spin_lock_contended
/*
* Using smp_load_acquire() provides a memory barrier that ensures
* subsequent operations happen after the lock is acquired.
*/
#define arch_mcs_spin_lock_contended(l) \
do { \
while (!(smp_load_acquire(l))) \
cpu_relax_lowlatency(); \
} while (0)
#endif
#ifndef arch_mcs_spin_unlock_contended
/*
* smp_store_release() provides a memory barrier to ensure all
* operations in the critical section has been completed before
* unlocking.
*/
#define arch_mcs_spin_unlock_contended(l) \
smp_store_release((l), 1)
#endif
/*
* Note: the smp_load_acquire/smp_store_release pair is not
* sufficient to form a full memory barrier across
* cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
* For applications that need a full barrier across multiple cpus
* with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
* used after mcs_lock.
*/
/*
* In order to acquire the lock, the caller should declare a local node and
* pass a reference of the node to this function in addition to the lock.
* If the lock has already been acquired, then this will proceed to spin
* on this node->locked until the previous lock holder sets the node->locked
* in mcs_spin_unlock().
*/
static inline
void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
{
struct mcs_spinlock *prev;
/* Init node */
node->locked = 0;
node->next = NULL;
prev = xchg(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
* only spin on its own node->locked value for lock acquisition.
* However, since this thread can immediately acquire the lock
* and does not proceed to spin on its own node->locked, this
* value won't be used. If a debug mode is needed to
* audit lock status, then set node->locked value here.
*/
return;
}
ACCESS_ONCE(prev->next) = node;
/* Wait until the lock holder passes the lock down. */
arch_mcs_spin_lock_contended(&node->locked);
}
/*
* Releases the lock. The caller should pass in the corresponding node that
* was used to acquire the lock.
*/
static inline
void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
{
struct mcs_spinlock *next = ACCESS_ONCE(node->next);
if (likely(!next)) {
/*
* Release the lock by setting it to NULL
*/
if (likely(cmpxchg(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
while (!(next = ACCESS_ONCE(node->next)))
cpu_relax_lowlatency();
}
/* Pass lock to next waiter. */
arch_mcs_spin_unlock_contended(&next->locked);
}
/*
* Cancellable version of the MCS lock above.
*
* Intended for adaptive spinning of sleeping locks:
* mutex_lock()/rwsem_down_{read,write}() etc.
*/
struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
int cpu; /* encoded CPU # value */
};
extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock);
#endif /* __LINUX_MCS_SPINLOCK_H */

View file

@ -0,0 +1,120 @@
/*
* kernel/mutex-debug.c
*
* Debugging code for mutexes
*
* Started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* lock debugging, locking tree, deadlock detection started by:
*
* Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
* Released under the General Public License (GPL).
*/
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/poison.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include "mutex-debug.h"
/*
* Must be called with lock->wait_lock held.
*/
void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
{
memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
waiter->magic = waiter;
INIT_LIST_HEAD(&waiter->list);
}
void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
{
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
}
void debug_mutex_free_waiter(struct mutex_waiter *waiter)
{
DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
}
void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti)
{
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
/* Mark the current thread as blocked on the lock: */
ti->task->blocked_on = waiter;
}
void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti)
{
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
ti->task->blocked_on = NULL;
list_del_init(&waiter->list);
waiter->task = NULL;
}
void debug_mutex_unlock(struct mutex *lock)
{
if (likely(debug_locks)) {
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
if (!lock->owner)
DEBUG_LOCKS_WARN_ON(!lock->owner);
else
DEBUG_LOCKS_WARN_ON(lock->owner != current);
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
mutex_clear_owner(lock);
}
/*
* __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
* mutexes so that we can do it here after we've verified state.
*/
atomic_set(&lock->count, 1);
}
void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
#endif
lock->magic = lock;
}
/***
* mutex_destroy - mark a mutex unusable
* @lock: the mutex to be destroyed
*
* This function marks the mutex uninitialized, and any subsequent
* use of the mutex is forbidden. The mutex must not be locked when
* this function is called.
*/
void mutex_destroy(struct mutex *lock)
{
DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
lock->magic = NULL;
}
EXPORT_SYMBOL_GPL(mutex_destroy);

View file

@ -0,0 +1,55 @@
/*
* Mutexes: blocking mutual exclusion locks
*
* started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* This file contains mutex debugging related internal declarations,
* prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
* More details are in kernel/mutex-debug.c.
*/
/*
* This must be called with lock->wait_lock held.
*/
extern void debug_mutex_lock_common(struct mutex *lock,
struct mutex_waiter *waiter);
extern void debug_mutex_wake_waiter(struct mutex *lock,
struct mutex_waiter *waiter);
extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
extern void debug_mutex_add_waiter(struct mutex *lock,
struct mutex_waiter *waiter,
struct thread_info *ti);
extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti);
extern void debug_mutex_unlock(struct mutex *lock);
extern void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
static inline void mutex_set_owner(struct mutex *lock)
{
lock->owner = current;
}
static inline void mutex_clear_owner(struct mutex *lock)
{
lock->owner = NULL;
}
#define spin_lock_mutex(lock, flags) \
do { \
struct mutex *l = container_of(lock, struct mutex, wait_lock); \
\
DEBUG_LOCKS_WARN_ON(in_interrupt()); \
local_irq_save(flags); \
arch_spin_lock(&(lock)->rlock.raw_lock);\
DEBUG_LOCKS_WARN_ON(l->magic != l); \
} while (0)
#define spin_unlock_mutex(lock, flags) \
do { \
arch_spin_unlock(&(lock)->rlock.raw_lock); \
local_irq_restore(flags); \
preempt_check_resched(); \
} while (0)

971
kernel/locking/mutex.c Normal file
View file

@ -0,0 +1,971 @@
/*
* kernel/locking/mutex.c
*
* Mutexes: blocking mutual exclusion locks
*
* Started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
* David Howells for suggestions and improvements.
*
* - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
* from the -rt tree, where it was originally implemented for rtmutexes
* by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
* and Sven Dietrich.
*
* Also see Documentation/locking/mutex-design.txt.
*/
#include <linux/mutex.h>
#include <linux/ww_mutex.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include "mcs_spinlock.h"
/*
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
* which forces all calls into the slowpath:
*/
#ifdef CONFIG_DEBUG_MUTEXES
# include "mutex-debug.h"
# include <asm-generic/mutex-null.h>
/*
* Must be 0 for the debug case so we do not do the unlock outside of the
* wait_lock region. debug_mutex_unlock() will do the actual unlock in this
* case.
*/
# undef __mutex_slowpath_needs_to_unlock
# define __mutex_slowpath_needs_to_unlock() 0
#else
# include "mutex.h"
# include <asm/mutex.h>
#endif
void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
atomic_set(&lock->count, 1);
spin_lock_init(&lock->wait_lock);
INIT_LIST_HEAD(&lock->wait_list);
mutex_clear_owner(lock);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
osq_lock_init(&lock->osq);
#endif
debug_mutex_init(lock, name, key);
}
EXPORT_SYMBOL(__mutex_init);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* We split the mutex lock/unlock logic into separate fastpath and
* slowpath functions, to reduce the register pressure on the fastpath.
* We also put the fastpath first in the kernel image, to make sure the
* branch is predicted by the CPU as default-untaken.
*/
__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
/**
* mutex_lock - acquire the mutex
* @lock: the mutex to be acquired
*
* Lock the mutex exclusively for this task. If the mutex is not
* available right now, it will sleep until it can get it.
*
* The mutex must later on be released by the same task that
* acquired it. Recursive locking is not allowed. The task
* may not exit without first unlocking the mutex. Also, kernel
* memory where the mutex resides mutex must not be freed with
* the mutex still locked. The mutex must first be initialized
* (or statically defined) before it can be locked. memset()-ing
* the mutex to 0 is not allowed.
*
* ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
* checks that will enforce the restrictions and will also do
* deadlock debugging. )
*
* This function is similar to (but not equivalent to) down().
*/
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*
* The locking fastpath is the 1->0 transition from
* 'unlocked' into 'locked' state.
*/
__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
mutex_set_owner(lock);
}
EXPORT_SYMBOL(mutex_lock);
#endif
static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
/*
* If this WARN_ON triggers, you used ww_mutex_lock to acquire,
* but released with a normal mutex_unlock in this call.
*
* This should never happen, always use ww_mutex_unlock.
*/
DEBUG_LOCKS_WARN_ON(ww->ctx);
/*
* Not quite done after calling ww_acquire_done() ?
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
if (ww_ctx->contending_lock) {
/*
* After -EDEADLK you tried to
* acquire a different ww_mutex? Bad!
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
/*
* You called ww_mutex_lock after receiving -EDEADLK,
* but 'forgot' to unlock everything else first?
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
ww_ctx->contending_lock = NULL;
}
/*
* Naughty, using a different class will lead to undefined behavior!
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
#endif
ww_ctx->acquired++;
}
/*
* after acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*
* This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
* as the fastpath and opportunistic spinning are disabled in that case.
*/
static __always_inline void
ww_mutex_set_context_fastpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
unsigned long flags;
struct mutex_waiter *cur;
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
/*
* The lock->ctx update should be visible on all cores before
* the atomic read is done, otherwise contended waiters might be
* missed. The contended waiters will either see ww_ctx == NULL
* and keep spinning, or it will acquire wait_lock, add itself
* to waiter list and sleep.
*/
smp_mb(); /* ^^^ */
/*
* Check if lock is contended, if not there is nobody to wake up
*/
if (likely(atomic_read(&lock->base.count) == 0))
return;
/*
* Uh oh, we raced in fastpath, wake up everyone in this case,
* so they can see the new lock->ctx.
*/
spin_lock_mutex(&lock->base.wait_lock, flags);
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
spin_unlock_mutex(&lock->base.wait_lock, flags);
}
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* In order to avoid a stampede of mutex spinners from acquiring the mutex
* more or less simultaneously, the spinners need to acquire a MCS lock
* first before spinning on the owner field.
*
*/
/*
* Mutex spinning code migrated from kernel/sched/core.c
*/
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
{
if (lock->owner != owner)
return false;
/*
* Ensure we emit the owner->on_cpu, dereference _after_ checking
* lock->owner still matches owner, if that fails, owner might
* point to free()d memory, if it still matches, the rcu_read_lock()
* ensures the memory stays valid.
*/
barrier();
return owner->on_cpu;
}
/*
* Look out! "owner" is an entirely speculative pointer
* access and not reliable.
*/
static noinline
int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
{
rcu_read_lock();
while (owner_running(lock, owner)) {
if (need_resched())
break;
cpu_relax_lowlatency();
}
rcu_read_unlock();
/*
* We break out the loop above on need_resched() and when the
* owner changed, which is a sign for heavy contention. Return
* success only when lock->owner is NULL.
*/
return lock->owner == NULL;
}
/*
* Initial check for entering the mutex spinning loop
*/
static inline int mutex_can_spin_on_owner(struct mutex *lock)
{
struct task_struct *owner;
int retval = 1;
if (need_resched())
return 0;
rcu_read_lock();
owner = ACCESS_ONCE(lock->owner);
if (owner)
retval = owner->on_cpu;
rcu_read_unlock();
/*
* if lock->owner is not set, the mutex owner may have just acquired
* it and not set the owner yet or the mutex has been released.
*/
return retval;
}
/*
* Atomically try to take the lock when it is available
*/
static inline bool mutex_try_to_acquire(struct mutex *lock)
{
return !mutex_is_locked(lock) &&
(atomic_cmpxchg(&lock->count, 1, 0) == 1);
}
/*
* Optimistic spinning.
*
* We try to spin for acquisition when we find that the lock owner
* is currently running on a (different) CPU and while we don't
* need to reschedule. The rationale is that if the lock owner is
* running, it is likely to release the lock soon.
*
* Since this needs the lock owner, and this mutex implementation
* doesn't track the owner atomically in the lock field, we need to
* track it non-atomically.
*
* We can't do this for DEBUG_MUTEXES because that relies on wait_lock
* to serialize everything.
*
* The mutex spinners are queued up using MCS lock so that only one
* spinner can compete for the mutex. However, if mutex spinning isn't
* going to happen, there is no point in going through the lock/unlock
* overhead.
*
* Returns true when the lock was taken, otherwise false, indicating
* that we need to jump to the slowpath and sleep.
*/
static bool mutex_optimistic_spin(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct task_struct *task = current;
if (!mutex_can_spin_on_owner(lock))
goto done;
if (!osq_lock(&lock->osq))
goto done;
while (true) {
struct task_struct *owner;
if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
/*
* If ww->ctx is set the contents are undefined, only
* by acquiring wait_lock there is a guarantee that
* they are not invalid when reading.
*
* As such, when deadlock detection needs to be
* performed the optimistic spinning cannot be done.
*/
if (ACCESS_ONCE(ww->ctx))
break;
}
/*
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
owner = ACCESS_ONCE(lock->owner);
if (owner && !mutex_spin_on_owner(lock, owner))
break;
/* Try to acquire the mutex if it is unlocked. */
if (mutex_try_to_acquire(lock)) {
lock_acquired(&lock->dep_map, ip);
if (use_ww_ctx) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
ww_mutex_set_context_fastpath(ww, ww_ctx);
}
mutex_set_owner(lock);
osq_unlock(&lock->osq);
return true;
}
/*
* When there's no owner, we might have preempted between the
* owner acquiring the lock and setting the owner field. If
* we're an RT task that will live-lock because we won't let
* the owner complete.
*/
if (!owner && (need_resched() || rt_task(task)))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
* everything in this loop to be re-loaded. We don't need
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
cpu_relax_lowlatency();
}
osq_unlock(&lock->osq);
done:
/*
* If we fell out of the spin path because of need_resched(),
* reschedule now, before we try-lock the mutex. This avoids getting
* scheduled out right after we obtained the mutex.
*/
if (need_resched())
schedule_preempt_disabled();
return false;
}
#else
static bool mutex_optimistic_spin(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
return false;
}
#endif
__visible __used noinline
void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
/**
* mutex_unlock - release the mutex
* @lock: the mutex to be released
*
* Unlock a mutex that has been locked by this task previously.
*
* This function must not be used in interrupt context. Unlocking
* of a not locked mutex is not allowed.
*
* This function is similar to (but not equivalent to) up().
*/
void __sched mutex_unlock(struct mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(lock);
#endif
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
}
EXPORT_SYMBOL(mutex_unlock);
/**
* ww_mutex_unlock - release the w/w mutex
* @lock: the mutex to be released
*
* Unlock a mutex that has been locked by this task previously with any of the
* ww_mutex_lock* functions (with or without an acquire context). It is
* forbidden to release the locks after releasing the acquire context.
*
* This function must not be used in interrupt context. Unlocking
* of a unlocked mutex is not allowed.
*/
void __sched ww_mutex_unlock(struct ww_mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
if (lock->ctx) {
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
#endif
if (lock->ctx->acquired > 0)
lock->ctx->acquired--;
lock->ctx = NULL;
}
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(&lock->base);
#endif
__mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
}
EXPORT_SYMBOL(ww_mutex_unlock);
static inline int __sched
__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
if (!hold_ctx)
return 0;
if (unlikely(ctx == hold_ctx))
return -EALREADY;
if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
(ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
ctx->contending_lock = ww;
#endif
return -EDEADLK;
}
return 0;
}
/*
* Lock a mutex (possibly interruptible), slowpath:
*/
static __always_inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
int ret;
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
/* got the lock, yay! */
preempt_enable();
return 0;
}
spin_lock_mutex(&lock->wait_lock, flags);
/*
* Once more, try to acquire the lock. Only try-lock the mutex if
* it is unlocked to reduce unnecessary xchg() operations.
*/
if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;
lock_contended(&lock->dep_map, ip);
for (;;) {
/*
* Lets try to take the lock again - this is needed even if
* we get here for the first time (shortly after failing to
* acquire the lock), to make sure that we get a wakeup once
* it's unlocked. Later on, if we sleep, this is the
* operation that gives us the lock. We xchg it to -1, so
* that when we release the lock, we properly wake up the
* other waiters. We only attempt the xchg if the count is
* non-negative in order to avoid unnecessary xchg operations:
*/
if (atomic_read(&lock->count) >= 0 &&
(atomic_xchg(&lock->count, -1) == 1))
break;
/*
* got a signal? (This code gets eliminated in the
* TASK_UNINTERRUPTIBLE case.)
*/
if (unlikely(signal_pending_state(state, task))) {
ret = -EINTR;
goto err;
}
if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
}
__set_task_state(task, state);
/* didn't get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
schedule_preempt_disabled();
spin_lock_mutex(&lock->wait_lock, flags);
}
mutex_remove_waiter(lock, &waiter, current_thread_info());
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
debug_mutex_free_waiter(&waiter);
skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
mutex_set_owner(lock);
if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct mutex_waiter *cur;
/*
* This branch gets optimized out for the common case,
* and is only important for ww_mutex_lock.
*/
ww_mutex_lock_acquired(ww, ww_ctx);
ww->ctx = ww_ctx;
/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->wait_list, list) {
debug_mutex_wake_waiter(lock, cur);
wake_up_process(cur->task);
}
}
spin_unlock_mutex(&lock->wait_lock, flags);
preempt_enable();
return 0;
err:
mutex_remove_waiter(lock, &waiter, task_thread_info(task));
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
preempt_enable();
return ret;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
void __sched
_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
{
might_sleep();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
0, nest, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
int __sched
mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
return __mutex_lock_common(lock, TASK_KILLABLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
int __sched
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
static inline int
ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
unsigned tmp;
if (ctx->deadlock_inject_countdown-- == 0) {
tmp = ctx->deadlock_inject_interval;
if (tmp > UINT_MAX/4)
tmp = UINT_MAX;
else
tmp = tmp*2 + tmp + tmp/2;
ctx->deadlock_inject_interval = tmp;
ctx->deadlock_inject_countdown = tmp;
ctx->contending_lock = lock;
ww_mutex_unlock(lock);
return -EDEADLK;
}
#endif
return 0;
}
int __sched
__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
0, &ctx->dep_map, _RET_IP_, ctx, 1);
if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
EXPORT_SYMBOL_GPL(__ww_mutex_lock);
int __sched
__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
0, &ctx->dep_map, _RET_IP_, ctx, 1);
if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
#endif
/*
* Release the lock, slowpath:
*/
static inline void
__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
{
unsigned long flags;
/*
* As a performance measurement, release the lock before doing other
* wakeup related duties to follow. This allows other tasks to acquire
* the lock sooner, while still handling cleanups in past unlock calls.
* This can be done as we do not enforce strict equivalence between the
* mutex counter and wait_list.
*
*
* Some architectures leave the lock unlocked in the fastpath failure
* case, others need to leave it locked. In the later case we have to
* unlock it here - as the lock counter is currently 0 or negative.
*/
if (__mutex_slowpath_needs_to_unlock())
atomic_set(&lock->count, 1);
spin_lock_mutex(&lock->wait_lock, flags);
mutex_release(&lock->dep_map, nested, _RET_IP_);
debug_mutex_unlock(lock);
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
struct mutex_waiter *waiter =
list_entry(lock->wait_list.next,
struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
wake_up_process(waiter->task);
}
spin_unlock_mutex(&lock->wait_lock, flags);
}
/*
* Release the lock, slowpath:
*/
__visible void
__mutex_unlock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
__mutex_unlock_common_slowpath(lock, 1);
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* Here come the less common (and hence less performance-critical) APIs:
* mutex_lock_interruptible() and mutex_trylock().
*/
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock);
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock);
/**
* mutex_lock_interruptible - acquire the mutex, interruptible
* @lock: the mutex to be acquired
*
* Lock the mutex like mutex_lock(), and return 0 if the mutex has
* been acquired or sleep until the mutex becomes available. If a
* signal arrives while waiting for the lock then this function
* returns -EINTR.
*
* This function is similar to (but not equivalent to) down_interruptible().
*/
int __sched mutex_lock_interruptible(struct mutex *lock)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->count);
if (likely(!ret)) {
mutex_set_owner(lock);
return 0;
} else
return __mutex_lock_interruptible_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_interruptible);
int __sched mutex_lock_killable(struct mutex *lock)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->count);
if (likely(!ret)) {
mutex_set_owner(lock);
return 0;
} else
return __mutex_lock_killable_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_killable);
__visible void __sched
__mutex_lock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock)
{
return __mutex_lock_common(lock, TASK_KILLABLE, 0,
NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock)
{
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0,
NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0,
NULL, _RET_IP_, ctx, 1);
}
static noinline int __sched
__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
NULL, _RET_IP_, ctx, 1);
}
#endif
/*
* Spinlock based trylock, we take the spinlock and check whether we
* can get the lock:
*/
static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
unsigned long flags;
int prev;
/* No need to trylock if the mutex is locked. */
if (mutex_is_locked(lock))
return 0;
spin_lock_mutex(&lock->wait_lock, flags);
prev = atomic_xchg(&lock->count, -1);
if (likely(prev == 1)) {
mutex_set_owner(lock);
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
}
/* Set it back to 0 if there are no waiters: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
spin_unlock_mutex(&lock->wait_lock, flags);
return prev == 1;
}
/**
* mutex_trylock - try to acquire the mutex, without waiting
* @lock: the mutex to be acquired
*
* Try to acquire the mutex atomically. Returns 1 if the mutex
* has been acquired successfully, and 0 on contention.
*
* NOTE: this function follows the spin_trylock() convention, so
* it is negated from the down_trylock() return values! Be careful
* about this when converting semaphore users to mutexes.
*
* This function must not be used in interrupt context. The
* mutex must be released by the same task that acquired it.
*/
int __sched mutex_trylock(struct mutex *lock)
{
int ret;
ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
if (ret)
mutex_set_owner(lock);
return ret;
}
EXPORT_SYMBOL(mutex_trylock);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
int __sched
__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->base.count);
if (likely(!ret)) {
ww_mutex_set_context_fastpath(lock, ctx);
mutex_set_owner(&lock->base);
} else
ret = __ww_mutex_lock_slowpath(lock, ctx);
return ret;
}
EXPORT_SYMBOL(__ww_mutex_lock);
int __sched
__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->base.count);
if (likely(!ret)) {
ww_mutex_set_context_fastpath(lock, ctx);
mutex_set_owner(&lock->base);
} else
ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx);
return ret;
}
EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
#endif
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
* @cnt: the atomic which we are to dec
* @lock: the mutex to return holding if we dec to 0
*
* return true and hold lock if we dec to 0, return false otherwise
*/
int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
{
/* dec if we can't possibly hit 0 */
if (atomic_add_unless(cnt, -1, 1))
return 0;
/* we might hit 0, so take the lock */
mutex_lock(lock);
if (!atomic_dec_and_test(cnt)) {
/* when we actually did the dec, we didn't hit 0 */
mutex_unlock(lock);
return 0;
}
/* we hit 0, and we hold the lock */
return 1;
}
EXPORT_SYMBOL(atomic_dec_and_mutex_lock);

48
kernel/locking/mutex.h Normal file
View file

@ -0,0 +1,48 @@
/*
* Mutexes: blocking mutual exclusion locks
*
* started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* This file contains mutex debugging related internal prototypes, for the
* !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
*/
#define spin_lock_mutex(lock, flags) \
do { spin_lock(lock); (void)(flags); } while (0)
#define spin_unlock_mutex(lock, flags) \
do { spin_unlock(lock); (void)(flags); } while (0)
#define mutex_remove_waiter(lock, waiter, ti) \
__list_del((waiter)->list.prev, (waiter)->list.next)
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
static inline void mutex_set_owner(struct mutex *lock)
{
lock->owner = current;
}
static inline void mutex_clear_owner(struct mutex *lock)
{
lock->owner = NULL;
}
#else
static inline void mutex_set_owner(struct mutex *lock)
{
}
static inline void mutex_clear_owner(struct mutex *lock)
{
}
#endif
#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
#define debug_mutex_free_waiter(waiter) do { } while (0)
#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
#define debug_mutex_unlock(lock) do { } while (0)
#define debug_mutex_init(lock, name, key) do { } while (0)
static inline void
debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
{
}

View file

@ -0,0 +1,165 @@
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/errno.h>
int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
const char *name, struct lock_class_key *rwsem_key)
{
brw->fast_read_ctr = alloc_percpu(int);
if (unlikely(!brw->fast_read_ctr))
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
__init_rwsem(&brw->rw_sem, name, rwsem_key);
atomic_set(&brw->write_ctr, 0);
atomic_set(&brw->slow_read_ctr, 0);
init_waitqueue_head(&brw->write_waitq);
return 0;
}
void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
{
free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */
}
/*
* This is the fast-path for down_read/up_read, it only needs to ensure
* there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
* fast per-cpu counter. The writer uses synchronize_sched_expedited() to
* serialize with the preempt-disabled section below.
*
* The nontrivial part is that we should guarantee acquire/release semantics
* in case when
*
* R_W: down_write() comes after up_read(), the writer should see all
* changes done by the reader
* or
* W_R: down_read() comes after up_write(), the reader should see all
* changes done by the writer
*
* If this helper fails the callers rely on the normal rw_semaphore and
* atomic_dec_and_test(), so in this case we have the necessary barriers.
*
* But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
* __this_cpu_add() below can be reordered with any LOAD/STORE done by the
* reader inside the critical section. See the comments in down_write and
* up_write below.
*/
static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
{
bool success = false;
preempt_disable();
if (likely(!atomic_read(&brw->write_ctr))) {
__this_cpu_add(*brw->fast_read_ctr, val);
success = true;
}
preempt_enable();
return success;
}
/*
* Like the normal down_read() this is not recursive, the writer can
* come after the first percpu_down_read() and create the deadlock.
*
* Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
* percpu_up_read() does rwsem_release(). This pairs with the usage
* of ->rw_sem in percpu_down/up_write().
*/
void percpu_down_read(struct percpu_rw_semaphore *brw)
{
might_sleep();
if (likely(update_fast_ctr(brw, +1))) {
rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
return;
}
down_read(&brw->rw_sem);
atomic_inc(&brw->slow_read_ctr);
/* avoid up_read()->rwsem_release() */
__up_read(&brw->rw_sem);
}
void percpu_up_read(struct percpu_rw_semaphore *brw)
{
rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
if (likely(update_fast_ctr(brw, -1)))
return;
/* false-positive is possible but harmless */
if (atomic_dec_and_test(&brw->slow_read_ctr))
wake_up_all(&brw->write_waitq);
}
static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
{
unsigned int sum = 0;
int cpu;
for_each_possible_cpu(cpu) {
sum += per_cpu(*brw->fast_read_ctr, cpu);
per_cpu(*brw->fast_read_ctr, cpu) = 0;
}
return sum;
}
/*
* A writer increments ->write_ctr to force the readers to switch to the
* slow mode, note the atomic_read() check in update_fast_ctr().
*
* After that the readers can only inc/dec the slow ->slow_read_ctr counter,
* ->fast_read_ctr is stable. Once the writer moves its sum into the slow
* counter it represents the number of active readers.
*
* Finally the writer takes ->rw_sem for writing and blocks the new readers,
* then waits until the slow counter becomes zero.
*/
void percpu_down_write(struct percpu_rw_semaphore *brw)
{
/* tell update_fast_ctr() there is a pending writer */
atomic_inc(&brw->write_ctr);
/*
* 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
* so that update_fast_ctr() can't succeed.
*
* 2. Ensures we see the result of every previous this_cpu_add() in
* update_fast_ctr().
*
* 3. Ensures that if any reader has exited its critical section via
* fast-path, it executes a full memory barrier before we return.
* See R_W case in the comment above update_fast_ctr().
*/
synchronize_sched_expedited();
/* exclude other writers, and block the new readers completely */
down_write(&brw->rw_sem);
/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
/* wait for all readers to complete their percpu_up_read() */
wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
}
void percpu_up_write(struct percpu_rw_semaphore *brw)
{
/* release the lock, but the readers can't use the fast-path */
up_write(&brw->rw_sem);
/*
* Insert the barrier before the next fast-path in down_read,
* see W_R case in the comment above update_fast_ctr().
*/
synchronize_sched_expedited();
/* the last writer unblocks update_fast_ctr() */
atomic_dec(&brw->write_ctr);
}

132
kernel/locking/qrwlock.c Normal file
View file

@ -0,0 +1,132 @@
/*
* Queue read/write lock
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
*
* Authors: Waiman Long <waiman.long@hp.com>
*/
#include <linux/smp.h>
#include <linux/bug.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <asm/qrwlock.h>
/**
* rspin_until_writer_unlock - inc reader count & spin until writer is gone
* @lock : Pointer to queue rwlock structure
* @writer: Current queue rwlock writer status byte
*
* In interrupt context or at the head of the queue, the reader will just
* increment the reader count & wait until the writer releases the lock.
*/
static __always_inline void
rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
{
while ((cnts & _QW_WMASK) == _QW_LOCKED) {
cpu_relax_lowlatency();
cnts = smp_load_acquire((u32 *)&lock->cnts);
}
}
/**
* queue_read_lock_slowpath - acquire read lock of a queue rwlock
* @lock: Pointer to queue rwlock structure
*/
void queue_read_lock_slowpath(struct qrwlock *lock)
{
u32 cnts;
/*
* Readers come here when they cannot get the lock without waiting
*/
if (unlikely(in_interrupt())) {
/*
* Readers in interrupt context will spin until the lock is
* available without waiting in the queue.
*/
cnts = smp_load_acquire((u32 *)&lock->cnts);
rspin_until_writer_unlock(lock, cnts);
return;
}
atomic_sub(_QR_BIAS, &lock->cnts);
/*
* Put the reader into the wait queue
*/
arch_spin_lock(&lock->lock);
/*
* At the head of the wait queue now, wait until the writer state
* goes to 0 and then try to increment the reader count and get
* the lock. It is possible that an incoming writer may steal the
* lock in the interim, so it is necessary to check the writer byte
* to make sure that the write lock isn't taken.
*/
while (atomic_read(&lock->cnts) & _QW_WMASK)
cpu_relax_lowlatency();
cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
rspin_until_writer_unlock(lock, cnts);
/*
* Signal the next one in queue to become queue head
*/
arch_spin_unlock(&lock->lock);
}
EXPORT_SYMBOL(queue_read_lock_slowpath);
/**
* queue_write_lock_slowpath - acquire write lock of a queue rwlock
* @lock : Pointer to queue rwlock structure
*/
void queue_write_lock_slowpath(struct qrwlock *lock)
{
u32 cnts;
/* Put the writer into the wait queue */
arch_spin_lock(&lock->lock);
/* Try to acquire the lock directly if no reader is present */
if (!atomic_read(&lock->cnts) &&
(atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
goto unlock;
/*
* Set the waiting flag to notify readers that a writer is pending,
* or wait for a previous writer to go away.
*/
for (;;) {
cnts = atomic_read(&lock->cnts);
if (!(cnts & _QW_WMASK) &&
(atomic_cmpxchg(&lock->cnts, cnts,
cnts | _QW_WAITING) == cnts))
break;
cpu_relax_lowlatency();
}
/* When no more readers, set the locked flag */
for (;;) {
cnts = atomic_read(&lock->cnts);
if ((cnts == _QW_WAITING) &&
(atomic_cmpxchg(&lock->cnts, _QW_WAITING,
_QW_LOCKED) == _QW_WAITING))
break;
cpu_relax_lowlatency();
}
unlock:
arch_spin_unlock(&lock->lock);
}
EXPORT_SYMBOL(queue_write_lock_slowpath);

View file

@ -0,0 +1,184 @@
/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This code is based on the rt.c implementation in the preempt-rt tree.
* Portions of said code are
*
* Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey
* Copyright (C) 2006 Esben Nielsen
* Copyright (C) 2006 Kihon Technologies Inc.,
* Steven Rostedt <rostedt@goodmis.org>
*
* See rt.c in preempt-rt for proper credits and further information
*/
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/rbtree.h>
#include <linux/fs.h>
#include <linux/debug_locks.h>
#include "rtmutex_common.h"
static void printk_task(struct task_struct *p)
{
if (p)
printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
else
printk("<none>");
}
static void printk_lock(struct rt_mutex *lock, int print_owner)
{
if (lock->name)
printk(" [%p] {%s}\n",
lock, lock->name);
else
printk(" [%p] {%s:%d}\n",
lock, lock->file, lock->line);
if (print_owner && rt_mutex_owner(lock)) {
printk(".. ->owner: %p\n", lock->owner);
printk(".. held by: ");
printk_task(rt_mutex_owner(lock));
printk("\n");
}
}
void rt_mutex_debug_task_free(struct task_struct *task)
{
DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters));
DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
}
/*
* We fill out the fields in the waiter to store the information about
* the deadlock. We print when we return. act_waiter can be NULL in
* case of a remove waiter operation.
*/
void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
struct rt_mutex_waiter *act_waiter,
struct rt_mutex *lock)
{
struct task_struct *task;
if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter)
return;
task = rt_mutex_owner(act_waiter->lock);
if (task && task != current) {
act_waiter->deadlock_task_pid = get_pid(task_pid(task));
act_waiter->deadlock_lock = lock;
}
}
void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
{
struct task_struct *task;
if (!waiter->deadlock_lock || !debug_locks)
return;
rcu_read_lock();
task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID);
if (!task) {
rcu_read_unlock();
return;
}
if (!debug_locks_off()) {
rcu_read_unlock();
return;
}
printk("\n============================================\n");
printk( "[ BUG: circular locking deadlock detected! ]\n");
printk("%s\n", print_tainted());
printk( "--------------------------------------------\n");
printk("%s/%d is deadlocking current task %s/%d\n\n",
task->comm, task_pid_nr(task),
current->comm, task_pid_nr(current));
printk("\n1) %s/%d is trying to acquire this lock:\n",
current->comm, task_pid_nr(current));
printk_lock(waiter->lock, 1);
printk("\n2) %s/%d is blocked on this lock:\n",
task->comm, task_pid_nr(task));
printk_lock(waiter->deadlock_lock, 1);
debug_show_held_locks(current);
debug_show_held_locks(task);
printk("\n%s/%d's [blocked] stackdump:\n\n",
task->comm, task_pid_nr(task));
show_stack(task, NULL);
printk("\n%s/%d's [current] stackdump:\n\n",
current->comm, task_pid_nr(current));
dump_stack();
debug_show_all_locks();
rcu_read_unlock();
printk("[ turning off deadlock detection."
"Please report this trace. ]\n\n");
}
void debug_rt_mutex_lock(struct rt_mutex *lock)
{
}
void debug_rt_mutex_unlock(struct rt_mutex *lock)
{
DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current);
}
void
debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
{
}
void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock));
}
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
memset(waiter, 0x11, sizeof(*waiter));
waiter->deadlock_task_pid = NULL;
}
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
{
put_pid(waiter->deadlock_task_pid);
memset(waiter, 0x22, sizeof(*waiter));
}
void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
{
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lock->name = name;
}
void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
{
}
void rt_mutex_deadlock_account_unlock(struct task_struct *task)
{
}

View file

@ -0,0 +1,39 @@
/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This file contains macros used solely by rtmutex.c. Debug version.
*/
extern void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
extern void debug_rt_mutex_lock(struct rt_mutex *lock);
extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
struct task_struct *powner);
extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
struct rt_mutex_waiter *waiter,
struct rt_mutex *lock);
extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
# define debug_rt_mutex_reset_waiter(w) \
do { (w)->deadlock_lock = NULL; } while (0)
static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
enum rtmutex_chainwalk walk)
{
return (waiter != NULL);
}
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
{
debug_rt_mutex_print_deadlock(w);
}

View file

@ -0,0 +1,420 @@
/*
* RT-Mutex-tester: scriptable tester for rt mutexes
*
* started by Thomas Gleixner:
*
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
*/
#include <linux/device.h>
#include <linux/kthread.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/stat.h>
#include "rtmutex.h"
#define MAX_RT_TEST_THREADS 8
#define MAX_RT_TEST_MUTEXES 8
static spinlock_t rttest_lock;
static atomic_t rttest_event;
struct test_thread_data {
int opcode;
int opdata;
int mutexes[MAX_RT_TEST_MUTEXES];
int event;
struct device dev;
};
static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
static struct task_struct *threads[MAX_RT_TEST_THREADS];
static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
enum test_opcodes {
RTTEST_NOP = 0,
RTTEST_SCHEDOT, /* 1 Sched other, data = nice */
RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */
RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */
RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
/* 9, 10 - reserved for BKL commemoration */
RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
RTTEST_RESET = 99, /* 99 Reset all pending operations */
};
static int handle_op(struct test_thread_data *td, int lockwakeup)
{
int i, id, ret = -EINVAL;
switch(td->opcode) {
case RTTEST_NOP:
return 0;
case RTTEST_LOCKCONT:
td->mutexes[td->opdata] = 1;
td->event = atomic_add_return(1, &rttest_event);
return 0;
case RTTEST_RESET:
for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) {
if (td->mutexes[i] == 4) {
rt_mutex_unlock(&mutexes[i]);
td->mutexes[i] = 0;
}
}
return 0;
case RTTEST_RESETEVENT:
atomic_set(&rttest_event, 0);
return 0;
default:
if (lockwakeup)
return ret;
}
switch(td->opcode) {
case RTTEST_LOCK:
case RTTEST_LOCKNOWAIT:
id = td->opdata;
if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
return ret;
td->mutexes[id] = 1;
td->event = atomic_add_return(1, &rttest_event);
rt_mutex_lock(&mutexes[id]);
td->event = atomic_add_return(1, &rttest_event);
td->mutexes[id] = 4;
return 0;
case RTTEST_LOCKINT:
case RTTEST_LOCKINTNOWAIT:
id = td->opdata;
if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
return ret;
td->mutexes[id] = 1;
td->event = atomic_add_return(1, &rttest_event);
ret = rt_mutex_lock_interruptible(&mutexes[id], 0);
td->event = atomic_add_return(1, &rttest_event);
td->mutexes[id] = ret ? 0 : 4;
return ret ? -EINTR : 0;
case RTTEST_UNLOCK:
id = td->opdata;
if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4)
return ret;
td->event = atomic_add_return(1, &rttest_event);
rt_mutex_unlock(&mutexes[id]);
td->event = atomic_add_return(1, &rttest_event);
td->mutexes[id] = 0;
return 0;
default:
break;
}
return ret;
}
/*
* Schedule replacement for rtsem_down(). Only called for threads with
* PF_MUTEX_TESTER set.
*
* This allows us to have finegrained control over the event flow.
*
*/
void schedule_rt_mutex_test(struct rt_mutex *mutex)
{
int tid, op, dat;
struct test_thread_data *td;
/* We have to lookup the task */
for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) {
if (threads[tid] == current)
break;
}
BUG_ON(tid == MAX_RT_TEST_THREADS);
td = &thread_data[tid];
op = td->opcode;
dat = td->opdata;
switch (op) {
case RTTEST_LOCK:
case RTTEST_LOCKINT:
case RTTEST_LOCKNOWAIT:
case RTTEST_LOCKINTNOWAIT:
if (mutex != &mutexes[dat])
break;
if (td->mutexes[dat] != 1)
break;
td->mutexes[dat] = 2;
td->event = atomic_add_return(1, &rttest_event);
break;
default:
break;
}
schedule();
switch (op) {
case RTTEST_LOCK:
case RTTEST_LOCKINT:
if (mutex != &mutexes[dat])
return;
if (td->mutexes[dat] != 2)
return;
td->mutexes[dat] = 3;
td->event = atomic_add_return(1, &rttest_event);
break;
case RTTEST_LOCKNOWAIT:
case RTTEST_LOCKINTNOWAIT:
if (mutex != &mutexes[dat])
return;
if (td->mutexes[dat] != 2)
return;
td->mutexes[dat] = 1;
td->event = atomic_add_return(1, &rttest_event);
return;
default:
return;
}
td->opcode = 0;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (td->opcode > 0) {
int ret;
set_current_state(TASK_RUNNING);
ret = handle_op(td, 1);
set_current_state(TASK_INTERRUPTIBLE);
if (td->opcode == RTTEST_LOCKCONT)
break;
td->opcode = ret;
}
/* Wait for the next command to be executed */
schedule();
}
/* Restore previous command and data */
td->opcode = op;
td->opdata = dat;
}
static int test_func(void *data)
{
struct test_thread_data *td = data;
int ret;
current->flags |= PF_MUTEX_TESTER;
set_freezable();
allow_signal(SIGHUP);
for(;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (td->opcode > 0) {
set_current_state(TASK_RUNNING);
ret = handle_op(td, 0);
set_current_state(TASK_INTERRUPTIBLE);
td->opcode = ret;
}
/* Wait for the next command to be executed */
schedule();
try_to_freeze();
if (signal_pending(current))
flush_signals(current);
if(kthread_should_stop())
break;
}
return 0;
}
/**
* sysfs_test_command - interface for test commands
* @dev: thread reference
* @buf: command for actual step
* @count: length of buffer
*
* command syntax:
*
* opcode:data
*/
static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct sched_param schedpar;
struct test_thread_data *td;
char cmdbuf[32];
int op, dat, tid, ret;
td = container_of(dev, struct test_thread_data, dev);
tid = td->dev.id;
/* strings from sysfs write are not 0 terminated! */
if (count >= sizeof(cmdbuf))
return -EINVAL;
/* strip of \n: */
if (buf[count-1] == '\n')
count--;
if (count < 1)
return -EINVAL;
memcpy(cmdbuf, buf, count);
cmdbuf[count] = 0;
if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2)
return -EINVAL;
switch (op) {
case RTTEST_SCHEDOT:
schedpar.sched_priority = 0;
ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar);
if (ret)
return ret;
set_user_nice(current, 0);
break;
case RTTEST_SCHEDRT:
schedpar.sched_priority = dat;
ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar);
if (ret)
return ret;
break;
case RTTEST_SIGNAL:
send_sig(SIGHUP, threads[tid], 0);
break;
default:
if (td->opcode > 0)
return -EBUSY;
td->opdata = dat;
td->opcode = op;
wake_up_process(threads[tid]);
}
return count;
}
/**
* sysfs_test_status - sysfs interface for rt tester
* @dev: thread to query
* @buf: char buffer to be filled with thread status info
*/
static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct test_thread_data *td;
struct task_struct *tsk;
char *curr = buf;
int i;
td = container_of(dev, struct test_thread_data, dev);
tsk = threads[td->dev.id];
spin_lock(&rttest_lock);
curr += sprintf(curr,
"O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
td->opcode, td->event, tsk->state,
(MAX_RT_PRIO - 1) - tsk->prio,
(MAX_RT_PRIO - 1) - tsk->normal_prio,
tsk->pi_blocked_on);
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
curr += sprintf(curr, "%d", td->mutexes[i]);
spin_unlock(&rttest_lock);
curr += sprintf(curr, ", T: %p, R: %p\n", tsk,
mutexes[td->dev.id].owner);
return curr - buf;
}
static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
static struct bus_type rttest_subsys = {
.name = "rttest",
.dev_name = "rttest",
};
static int init_test_thread(int id)
{
thread_data[id].dev.bus = &rttest_subsys;
thread_data[id].dev.id = id;
threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id);
if (IS_ERR(threads[id]))
return PTR_ERR(threads[id]);
return device_register(&thread_data[id].dev);
}
static int init_rttest(void)
{
int ret, i;
spin_lock_init(&rttest_lock);
for (i = 0; i < MAX_RT_TEST_MUTEXES; i++)
rt_mutex_init(&mutexes[i]);
ret = subsys_system_register(&rttest_subsys, NULL);
if (ret)
return ret;
for (i = 0; i < MAX_RT_TEST_THREADS; i++) {
ret = init_test_thread(i);
if (ret)
break;
ret = device_create_file(&thread_data[i].dev, &dev_attr_status);
if (ret)
break;
ret = device_create_file(&thread_data[i].dev, &dev_attr_command);
if (ret)
break;
}
printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" );
return ret;
}
device_initcall(init_rttest);

1646
kernel/locking/rtmutex.c Normal file

File diff suppressed because it is too large Load diff

36
kernel/locking/rtmutex.h Normal file
View file

@ -0,0 +1,36 @@
/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This file contains macros used solely by rtmutex.c.
* Non-debug version.
*/
#define rt_mutex_deadlock_check(l) (0)
#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
#define debug_rt_mutex_init_waiter(w) do { } while (0)
#define debug_rt_mutex_free_waiter(w) do { } while (0)
#define debug_rt_mutex_lock(l) do { } while (0)
#define debug_rt_mutex_proxy_lock(l,p) do { } while (0)
#define debug_rt_mutex_proxy_unlock(l) do { } while (0)
#define debug_rt_mutex_unlock(l) do { } while (0)
#define debug_rt_mutex_init(m, n) do { } while (0)
#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
#define debug_rt_mutex_print_deadlock(w) do { } while (0)
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
{
WARN(1, "rtmutex deadlock detected\n");
}
static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w,
enum rtmutex_chainwalk walk)
{
return walk == RT_MUTEX_FULL_CHAINWALK;
}

View file

@ -0,0 +1,141 @@
/*
* RT Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This file contains the private data structure and API definitions.
*/
#ifndef __KERNEL_RTMUTEX_COMMON_H
#define __KERNEL_RTMUTEX_COMMON_H
#include <linux/rtmutex.h>
/*
* The rtmutex in kernel tester is independent of rtmutex debugging. We
* call schedule_rt_mutex_test() instead of schedule() for the tasks which
* belong to the tester. That way we can delay the wakeup path of those
* threads to provoke lock stealing and testing of complex boosting scenarios.
*/
#ifdef CONFIG_RT_MUTEX_TESTER
extern void schedule_rt_mutex_test(struct rt_mutex *lock);
#define schedule_rt_mutex(_lock) \
do { \
if (!(current->flags & PF_MUTEX_TESTER)) \
schedule(); \
else \
schedule_rt_mutex_test(_lock); \
} while (0)
#else
# define schedule_rt_mutex(_lock) schedule()
#endif
/*
* This is the control structure for tasks blocked on a rt_mutex,
* which is allocated on the kernel stack on of the blocked task.
*
* @tree_entry: pi node to enqueue into the mutex waiters tree
* @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
* @task: task reference to the blocked task
*/
struct rt_mutex_waiter {
struct rb_node tree_entry;
struct rb_node pi_tree_entry;
struct task_struct *task;
struct rt_mutex *lock;
#ifdef CONFIG_DEBUG_RT_MUTEXES
unsigned long ip;
struct pid *deadlock_task_pid;
struct rt_mutex *deadlock_lock;
#endif
int prio;
};
/*
* Various helpers to access the waiters-tree:
*/
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
{
return !RB_EMPTY_ROOT(&lock->waiters);
}
static inline struct rt_mutex_waiter *
rt_mutex_top_waiter(struct rt_mutex *lock)
{
struct rt_mutex_waiter *w;
w = rb_entry(lock->waiters_leftmost, struct rt_mutex_waiter,
tree_entry);
BUG_ON(w->lock != lock);
return w;
}
static inline int task_has_pi_waiters(struct task_struct *p)
{
return !RB_EMPTY_ROOT(&p->pi_waiters);
}
static inline struct rt_mutex_waiter *
task_top_pi_waiter(struct task_struct *p)
{
return rb_entry(p->pi_waiters_leftmost, struct rt_mutex_waiter,
pi_tree_entry);
}
/*
* lock->owner state tracking:
*/
#define RT_MUTEX_HAS_WAITERS 1UL
#define RT_MUTEX_OWNER_MASKALL 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
return (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
}
/*
* Constants for rt mutex functions which have a selectable deadlock
* detection.
*
* RT_MUTEX_MIN_CHAINWALK: Stops the lock chain walk when there are
* no further PI adjustments to be made.
*
* RT_MUTEX_FULL_CHAINWALK: Invoke deadlock detection with a full
* walk of the lock chain.
*/
enum rtmutex_chainwalk {
RT_MUTEX_MIN_CHAINWALK,
RT_MUTEX_FULL_CHAINWALK,
};
/*
* PI-futex support (proxy locking functions, etc.):
*/
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter);
extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
#ifdef CONFIG_DEBUG_RT_MUTEXES
# include "rtmutex-debug.h"
#else
# include "rtmutex.h"
#endif
#endif

View file

@ -0,0 +1,296 @@
/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
* generic spinlock implementation
*
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
* - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
* - Derived also from comments by Linus
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/export.h>
enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
enum rwsem_waiter_type type;
};
int rwsem_is_locked(struct rw_semaphore *sem)
{
int ret = 1;
unsigned long flags;
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
ret = (sem->count != 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
return ret;
}
EXPORT_SYMBOL(rwsem_is_locked);
/*
* initialise the semaphore
*/
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->count = 0;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
}
EXPORT_SYMBOL(__init_rwsem);
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here, then:
* - the 'active count' _reached_ zero
* - the 'waiting count' is non-zero
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if wakewrite is non-zero
*/
static inline struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
int woken;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wakewrite)
/* Wake up a writer. Note that we do not grant it the
* lock - it will have to acquire it when it runs. */
wake_up_process(waiter->task);
goto out;
}
/* grant an infinite number of read locks to the front of the queue */
woken = 0;
do {
struct list_head *next = waiter->list.next;
list_del(&waiter->list);
tsk = waiter->task;
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
woken++;
if (next == &sem->wait_list)
break;
waiter = list_entry(next, struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
sem->count += woken;
out:
return sem;
}
/*
* wake a single writer
*/
static inline struct rw_semaphore *
__rwsem_wake_one_writer(struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
wake_up_process(waiter->task);
return sem;
}
/*
* get a read lock on the semaphore
*/
void __sched __down_read(struct rw_semaphore *sem)
{
struct rwsem_waiter waiter;
struct task_struct *tsk;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
goto out;
}
tsk = current;
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
/* set up my own style of waitqueue */
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(tsk);
list_add_tail(&waiter.list, &sem->wait_list);
/* we don't need to touch the semaphore struct anymore */
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
break;
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
tsk->state = TASK_RUNNING;
out:
;
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int __down_read_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* get a write lock on the semaphore
*/
void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
{
struct rwsem_waiter waiter;
struct task_struct *tsk;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* set up my own style of waitqueue */
tsk = current;
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);
/* wait for someone to release the lock */
for (;;) {
/*
* That is the key to support write lock stealing: allows the
* task already on CPU to get the lock soon rather than put
* itself into sleep and waiting for system woke it or someone
* else in the head of the wait list up.
*/
if (sem->count == 0)
break;
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->count = -1;
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
void __sched __down_write(struct rw_semaphore *sem)
{
__down_write_nested(sem, 0);
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int __down_write_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count == 0) {
/* got the lock */
sem->count = -1;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* release a read lock on the semaphore
*/
void __up_read(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (--sem->count == 0 && !list_empty(&sem->wait_list))
sem = __rwsem_wake_one_writer(sem);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* release a write lock on the semaphore
*/
void __up_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 0;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 1);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* downgrade a write lock into a read lock
* - just wake up any readers at the front of the queue
*/
void __downgrade_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 1;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}

514
kernel/locking/rwsem-xadd.c Normal file
View file

@ -0,0 +1,514 @@
/* rwsem.c: R/W semaphores: contention handling functions
*
* Written by David Howells (dhowells@redhat.com).
* Derived from arch/i386/kernel/semaphore.c
*
* Writer lock-stealing by Alex Shi <alex.shi@intel.com>
* and Michel Lespinasse <walken@google.com>
*
* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/sched/rt.h>
#include "mcs_spinlock.h"
/*
* Guide to the rw_semaphore's count field for common values.
* (32-bit case illustrated, similar for 64-bit)
*
* 0x0000000X (1) X readers active or attempting lock, no writer waiting
* X = #active_readers + #readers attempting to lock
* (X*ACTIVE_BIAS)
*
* 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
* attempting to read lock or write lock.
*
* 0xffff000X (1) X readers active or attempting lock, with waiters for lock
* X = #active readers + # readers attempting lock
* (X*ACTIVE_BIAS + WAITING_BIAS)
* (2) 1 writer attempting lock, no waiters for lock
* X-1 = #active readers + #readers attempting lock
* ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
* (3) 1 writer active, no waiters for lock
* X-1 = #active readers + #readers attempting lock
* ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
*
* 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
* (WAITING_BIAS + ACTIVE_BIAS)
* (2) 1 writer active or attempting lock, no waiters for lock
* (ACTIVE_WRITE_BIAS)
*
* 0xffff0000 (1) There are writers or readers queued but none active
* or in the process of attempting lock.
* (WAITING_BIAS)
* Note: writer can attempt to steal lock for this count by adding
* ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
*
* 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
* (ACTIVE_WRITE_BIAS + WAITING_BIAS)
*
* Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
* the count becomes more than 0 for successful lock acquisition,
* i.e. the case where there are only readers or nobody has lock.
* (1st and 2nd case above).
*
* Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
* checking the count becomes ACTIVE_WRITE_BIAS for successful lock
* acquisition (i.e. nobody else has lock or attempts lock). If
* unsuccessful, in rwsem_down_write_failed, we'll check to see if there
* are only waiters but none active (5th case above), and attempt to
* steal the lock.
*
*/
/*
* Initialize an rwsem:
*/
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->count = RWSEM_UNLOCKED_VALUE;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
sem->owner = NULL;
osq_lock_init(&sem->osq);
#endif
}
EXPORT_SYMBOL(__init_rwsem);
enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
enum rwsem_waiter_type type;
};
enum rwsem_wake_type {
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
RWSEM_WAKE_READERS, /* Wake readers only */
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
};
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here from up_xxxx(), then:
* - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
* - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
* - there must be someone on the queue
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if downgrading is false
*/
static struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
struct list_head *next;
long oldcount, woken, loop, adjustment;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY)
/* Wake writer at the front of the queue, but do not
* grant it the lock yet as we want other writers
* to be able to steal it. Readers, on the other hand,
* will block as they will notice the queued writer.
*/
wake_up_process(waiter->task);
goto out;
}
/* Writers might steal the lock before we grant it to the next reader.
* We prefer to do the first reader grant before counting readers
* so we can bail out early if a writer stole the lock.
*/
adjustment = 0;
if (wake_type != RWSEM_WAKE_READ_OWNED) {
adjustment = RWSEM_ACTIVE_READ_BIAS;
try_reader_grant:
oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
/* A writer stole the lock. Undo our reader grant. */
if (rwsem_atomic_update(-adjustment, sem) &
RWSEM_ACTIVE_MASK)
goto out;
/* Last active locker left. Retry waking readers. */
goto try_reader_grant;
}
}
/* Grant an infinite number of read locks to the readers at the front
* of the queue. Note we increment the 'active part' of the count by
* the number of readers before waking any processes up.
*/
woken = 0;
do {
woken++;
if (waiter->list.next == &sem->wait_list)
break;
waiter = list_entry(waiter->list.next,
struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
if (waiter->type != RWSEM_WAITING_FOR_WRITE)
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
if (adjustment)
rwsem_atomic_add(adjustment, sem);
next = sem->wait_list.next;
loop = woken;
do {
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task;
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
} while (--loop);
sem->wait_list.next = next;
next->prev = &sem->wait_list;
out:
return sem;
}
/*
* Wait for the read lock to be granted
*/
__visible
struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
struct task_struct *tsk = current;
/* set up my own style of waitqueue */
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(tsk);
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list))
adjustment += RWSEM_WAITING_BIAS;
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
count = rwsem_atomic_update(adjustment, sem);
/* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
raw_spin_unlock_irq(&sem->wait_lock);
/* wait to be given the lock */
while (true) {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!waiter.task)
break;
schedule();
}
tsk->state = TASK_RUNNING;
return sem;
}
EXPORT_SYMBOL(rwsem_down_read_failed);
static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
{
/*
* Try acquiring the write lock. Check count first in order
* to reduce unnecessary expensive cmpxchg() operations.
*/
if (count == RWSEM_WAITING_BIAS &&
cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
if (!list_is_singular(&sem->wait_list))
rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
return true;
}
return false;
}
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* Try to acquire write lock before the writer has been put on wait queue.
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
long old, count = ACCESS_ONCE(sem->count);
while (true) {
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count)
return true;
count = old;
}
}
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner;
bool on_cpu = false;
if (need_resched())
return false;
rcu_read_lock();
owner = ACCESS_ONCE(sem->owner);
if (owner)
on_cpu = owner->on_cpu;
rcu_read_unlock();
/*
* If sem->owner is not set, yet we have just recently entered the
* slowpath, then there is a possibility reader(s) may have the lock.
* To be safe, avoid spinning in these situations.
*/
return on_cpu;
}
static inline bool owner_running(struct rw_semaphore *sem,
struct task_struct *owner)
{
if (sem->owner != owner)
return false;
/*
* Ensure we emit the owner->on_cpu, dereference _after_ checking
* sem->owner still matches owner, if that fails, owner might
* point to free()d memory, if it still matches, the rcu_read_lock()
* ensures the memory stays valid.
*/
barrier();
return owner->on_cpu;
}
static noinline
bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
{
rcu_read_lock();
while (owner_running(sem, owner)) {
if (need_resched())
break;
cpu_relax_lowlatency();
}
rcu_read_unlock();
/*
* We break out the loop above on need_resched() or when the
* owner changed, which is a sign for heavy contention. Return
* success only when sem->owner is NULL.
*/
return sem->owner == NULL;
}
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
struct task_struct *owner;
bool taken = false;
preempt_disable();
/* sem->wait_lock should not be held when doing optimistic spinning */
if (!rwsem_can_spin_on_owner(sem))
goto done;
if (!osq_lock(&sem->osq))
goto done;
while (true) {
owner = ACCESS_ONCE(sem->owner);
if (owner && !rwsem_spin_on_owner(sem, owner))
break;
/* wait_lock will be acquired if write_lock is obtained */
if (rwsem_try_write_lock_unqueued(sem)) {
taken = true;
break;
}
/*
* When there's no owner, we might have preempted between the
* owner acquiring the lock and setting the owner field. If
* we're an RT task that will live-lock because we won't let
* the owner complete.
*/
if (!owner && (need_resched() || rt_task(current)))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
* everything in this loop to be re-loaded. We don't need
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
cpu_relax_lowlatency();
}
osq_unlock(&sem->osq);
done:
preempt_enable();
return taken;
}
#else
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
return false;
}
#endif
/*
* Wait until we successfully acquire the write lock
*/
__visible
struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
long count;
bool waiting = true; /* any queued threads before us */
struct rwsem_waiter waiter;
/* undo write bias from down_write operation, stop active locking */
count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
/* do optimistic spinning and steal lock if possible */
if (rwsem_optimistic_spin(sem))
return sem;
/*
* Optimistic spinning failed, proceed to the slowpath
* and block until we can acquire the sem.
*/
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
raw_spin_lock_irq(&sem->wait_lock);
/* account for this before adding a new element to the list */
if (list_empty(&sem->wait_list))
waiting = false;
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
if (waiting) {
count = ACCESS_ONCE(sem->count);
/*
* If there were already threads queued before us and there are
* no active writers, the lock must be read owned; so we try to
* wake any read locks that were queued ahead of us.
*/
if (count > RWSEM_WAITING_BIAS)
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
} else
count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
/* wait until we successfully acquire the lock */
set_current_state(TASK_UNINTERRUPTIBLE);
while (true) {
if (rwsem_try_write_lock(count, sem))
break;
raw_spin_unlock_irq(&sem->wait_lock);
/* Block until there are no active lockers. */
do {
schedule();
set_current_state(TASK_UNINTERRUPTIBLE);
} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
raw_spin_lock_irq(&sem->wait_lock);
}
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
return sem;
}
EXPORT_SYMBOL(rwsem_down_write_failed);
/*
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
__visible
struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return sem;
}
EXPORT_SYMBOL(rwsem_wake);
/*
* downgrade a write lock into a read lock
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
__visible
struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return sem;
}
EXPORT_SYMBOL(rwsem_downgrade_wake);

186
kernel/locking/rwsem.c Normal file
View file

@ -0,0 +1,186 @@
/* kernel/rwsem.c: R/W semaphores, public implementation
*
* Written by David Howells (dhowells@redhat.com).
* Derived from asm-i386/semaphore.h
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
sem->owner = current;
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
sem->owner = NULL;
}
#else
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
}
#endif
/*
* lock for reading
*/
void __sched down_read(struct rw_semaphore *sem)
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
}
EXPORT_SYMBOL(down_read);
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int down_read_trylock(struct rw_semaphore *sem)
{
int ret = __down_read_trylock(sem);
if (ret == 1)
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
return ret;
}
EXPORT_SYMBOL(down_read_trylock);
/*
* lock for writing
*/
void __sched down_write(struct rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write);
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int down_write_trylock(struct rw_semaphore *sem)
{
int ret = __down_write_trylock(sem);
if (ret == 1) {
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
rwsem_set_owner(sem);
}
return ret;
}
EXPORT_SYMBOL(down_write_trylock);
/*
* release a read lock
*/
void up_read(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
__up_read(sem);
}
EXPORT_SYMBOL(up_read);
/*
* release a write lock
*/
void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
rwsem_clear_owner(sem);
__up_write(sem);
}
EXPORT_SYMBOL(up_write);
/*
* downgrade write lock to read lock
*/
void downgrade_write(struct rw_semaphore *sem)
{
/*
* lockdep: a downgraded write will live on as a write
* dependency.
*/
rwsem_clear_owner(sem);
__downgrade_write(sem);
}
EXPORT_SYMBOL(downgrade_write);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void down_read_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
}
EXPORT_SYMBOL(down_read_nested);
void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
{
might_sleep();
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(_down_write_nest_lock);
void down_read_non_owner(struct rw_semaphore *sem)
{
might_sleep();
__down_read(sem);
}
EXPORT_SYMBOL(down_read_non_owner);
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write_nested);
void up_read_non_owner(struct rw_semaphore *sem)
{
__up_read(sem);
}
EXPORT_SYMBOL(up_read_non_owner);
#endif

263
kernel/locking/semaphore.c Normal file
View file

@ -0,0 +1,263 @@
/*
* Copyright (c) 2008 Intel Corporation
* Author: Matthew Wilcox <willy@linux.intel.com>
*
* Distributed under the terms of the GNU GPL, version 2
*
* This file implements counting semaphores.
* A counting semaphore may be acquired 'n' times before sleeping.
* See mutex.c for single-acquisition sleeping locks which enforce
* rules which allow code to be debugged more easily.
*/
/*
* Some notes on the implementation:
*
* The spinlock controls access to the other members of the semaphore.
* down_trylock() and up() can be called from interrupt context, so we
* have to disable interrupts when taking the lock. It turns out various
* parts of the kernel expect to be able to use down() on a semaphore in
* interrupt context when they know it will succeed, so we have to use
* irqsave variants for down(), down_interruptible() and down_killable()
* too.
*
* The ->count variable represents how many more tasks can acquire this
* semaphore. If it's zero, there may be tasks waiting on the wait_list.
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
static noinline int __down_killable(struct semaphore *sem);
static noinline int __down_timeout(struct semaphore *sem, long timeout);
static noinline void __up(struct semaphore *sem);
/**
* down - acquire the semaphore
* @sem: the semaphore to be acquired
*
* Acquires the semaphore. If no more tasks are allowed to acquire the
* semaphore, calling this function will put the task to sleep until the
* semaphore is released.
*
* Use of this function is deprecated, please use down_interruptible() or
* down_killable() instead.
*/
void down(struct semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
__down(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(down);
/**
* down_interruptible - acquire the semaphore unless interrupted
* @sem: the semaphore to be acquired
*
* Attempts to acquire the semaphore. If no more tasks are allowed to
* acquire the semaphore, calling this function will put the task to sleep.
* If the sleep is interrupted by a signal, this function will return -EINTR.
* If the semaphore is successfully acquired, this function returns 0.
*/
int down_interruptible(struct semaphore *sem)
{
unsigned long flags;
int result = 0;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_interruptible(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
EXPORT_SYMBOL(down_interruptible);
/**
* down_killable - acquire the semaphore unless killed
* @sem: the semaphore to be acquired
*
* Attempts to acquire the semaphore. If no more tasks are allowed to
* acquire the semaphore, calling this function will put the task to sleep.
* If the sleep is interrupted by a fatal signal, this function will return
* -EINTR. If the semaphore is successfully acquired, this function returns
* 0.
*/
int down_killable(struct semaphore *sem)
{
unsigned long flags;
int result = 0;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_killable(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
EXPORT_SYMBOL(down_killable);
/**
* down_trylock - try to acquire the semaphore, without waiting
* @sem: the semaphore to be acquired
*
* Try to acquire the semaphore atomically. Returns 0 if the semaphore has
* been acquired successfully or 1 if it it cannot be acquired.
*
* NOTE: This return value is inverted from both spin_trylock and
* mutex_trylock! Be careful about this when converting code.
*
* Unlike mutex_trylock, this function can be used from interrupt context,
* and the semaphore can be released by any task or interrupt.
*/
int down_trylock(struct semaphore *sem)
{
unsigned long flags;
int count;
raw_spin_lock_irqsave(&sem->lock, flags);
count = sem->count - 1;
if (likely(count >= 0))
sem->count = count;
raw_spin_unlock_irqrestore(&sem->lock, flags);
return (count < 0);
}
EXPORT_SYMBOL(down_trylock);
/**
* down_timeout - acquire the semaphore within a specified time
* @sem: the semaphore to be acquired
* @timeout: how long to wait before failing
*
* Attempts to acquire the semaphore. If no more tasks are allowed to
* acquire the semaphore, calling this function will put the task to sleep.
* If the semaphore is not released within the specified number of jiffies,
* this function returns -ETIME. It returns 0 if the semaphore was acquired.
*/
int down_timeout(struct semaphore *sem, long timeout)
{
unsigned long flags;
int result = 0;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_timeout(sem, timeout);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
EXPORT_SYMBOL(down_timeout);
/**
* up - release the semaphore
* @sem: the semaphore to release
*
* Release the semaphore. Unlike mutexes, up() may be called from any
* context and even by tasks which have never called down().
*/
void up(struct semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(list_empty(&sem->wait_list)))
sem->count++;
else
__up(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(up);
/* Functions for the contended case */
struct semaphore_waiter {
struct list_head list;
struct task_struct *task;
bool up;
};
/*
* Because this function is inlined, the 'state' parameter will be
* constant, and thus optimised away by the compiler. Likewise the
* 'timeout' parameter for the cases without timeouts.
*/
static inline int __sched __down_common(struct semaphore *sem, long state,
long timeout)
{
struct task_struct *task = current;
struct semaphore_waiter waiter;
list_add_tail(&waiter.list, &sem->wait_list);
waiter.task = task;
waiter.up = false;
for (;;) {
if (signal_pending_state(state, task))
goto interrupted;
if (unlikely(timeout <= 0))
goto timed_out;
__set_task_state(task, state);
raw_spin_unlock_irq(&sem->lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->lock);
if (waiter.up)
return 0;
}
timed_out:
list_del(&waiter.list);
return -ETIME;
interrupted:
list_del(&waiter.list);
return -EINTR;
}
static noinline void __sched __down(struct semaphore *sem)
{
__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
static noinline int __sched __down_interruptible(struct semaphore *sem)
{
return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
static noinline int __sched __down_killable(struct semaphore *sem)
{
return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
}
static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
{
return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout);
}
static noinline void __sched __up(struct semaphore *sem)
{
struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
struct semaphore_waiter, list);
list_del(&waiter->list);
waiter->up = true;
wake_up_process(waiter->task);
}

399
kernel/locking/spinlock.c Normal file
View file

@ -0,0 +1,399 @@
/*
* Copyright (2004) Linus Torvalds
*
* Author: Zwane Mwaikambo <zwane@fsmlabs.com>
*
* Copyright (2004, 2005) Ingo Molnar
*
* This file contains the spinlock/rwlock implementations for the
* SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
*
* Note that some architectures have special knowledge about the
* stack frames of these functions in their profile_pc. If you
* change anything significant here that could change the stack
* frame contact the architecture maintainers.
*/
#include <linux/linkage.h>
#include <linux/preempt.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include <linux/export.h>
/*
* If lockdep is enabled then we use the non-preemption spin-ops
* even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
* not re-enabled during lock-acquire (which the preempt-spin-ops do):
*/
#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
/*
* The __lock_function inlines are taken from
* include/linux/spinlock_api_smp.h
*/
#else
#define raw_read_can_lock(l) read_can_lock(l)
#define raw_write_can_lock(l) write_can_lock(l)
/*
* Some architectures can relax in favour of the CPU owning the lock.
*/
#ifndef arch_read_relax
# define arch_read_relax(l) cpu_relax()
#endif
#ifndef arch_write_relax
# define arch_write_relax(l) cpu_relax()
#endif
#ifndef arch_spin_relax
# define arch_spin_relax(l) cpu_relax()
#endif
/*
* We build the __lock_function inlines here. They are too large for
* inlining all over the place, but here is only one user per function
* which embedds them into the calling _lock_function below.
*
* This could be a long-held lock. We both prepare to spin for a long
* time (making _this_ CPU preemptable if possible), and we also signal
* towards that other CPU that it should break the lock ASAP.
*/
#define BUILD_LOCK_OPS(op, locktype) \
void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
{ \
for (;;) { \
preempt_disable(); \
if (likely(do_raw_##op##_trylock(lock))) \
break; \
preempt_enable(); \
\
if (!(lock)->break_lock) \
(lock)->break_lock = 1; \
while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
arch_##op##_relax(&lock->raw_lock); \
} \
(lock)->break_lock = 0; \
} \
\
unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
{ \
unsigned long flags; \
\
for (;;) { \
preempt_disable(); \
local_irq_save(flags); \
if (likely(do_raw_##op##_trylock(lock))) \
break; \
local_irq_restore(flags); \
preempt_enable(); \
\
if (!(lock)->break_lock) \
(lock)->break_lock = 1; \
while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
arch_##op##_relax(&lock->raw_lock); \
} \
(lock)->break_lock = 0; \
return flags; \
} \
\
void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \
{ \
_raw_##op##_lock_irqsave(lock); \
} \
\
void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
{ \
unsigned long flags; \
\
/* */ \
/* Careful: we must exclude softirqs too, hence the */ \
/* irq-disabling. We use the generic preemption-aware */ \
/* function: */ \
/**/ \
flags = _raw_##op##_lock_irqsave(lock); \
local_bh_disable(); \
local_irq_restore(flags); \
} \
/*
* Build preemption-friendly versions of the following
* lock-spinning functions:
*
* __[spin|read|write]_lock()
* __[spin|read|write]_lock_irq()
* __[spin|read|write]_lock_irqsave()
* __[spin|read|write]_lock_bh()
*/
BUILD_LOCK_OPS(spin, raw_spinlock);
BUILD_LOCK_OPS(read, rwlock);
BUILD_LOCK_OPS(write, rwlock);
#endif
#ifndef CONFIG_INLINE_SPIN_TRYLOCK
int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
{
return __raw_spin_trylock(lock);
}
EXPORT_SYMBOL(_raw_spin_trylock);
#endif
#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
{
return __raw_spin_trylock_bh(lock);
}
EXPORT_SYMBOL(_raw_spin_trylock_bh);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK
void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
{
__raw_spin_lock(lock);
}
EXPORT_SYMBOL(_raw_spin_lock);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
return __raw_spin_lock_irqsave(lock);
}
EXPORT_SYMBOL(_raw_spin_lock_irqsave);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
{
__raw_spin_lock_irq(lock);
}
EXPORT_SYMBOL(_raw_spin_lock_irq);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK_BH
void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
{
__raw_spin_lock_bh(lock);
}
EXPORT_SYMBOL(_raw_spin_lock_bh);
#endif
#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
{
__raw_spin_unlock(lock);
}
EXPORT_SYMBOL(_raw_spin_unlock);
#endif
#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_unlock_irqrestore(lock, flags);
}
EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
#endif
#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
{
__raw_spin_unlock_irq(lock);
}
EXPORT_SYMBOL(_raw_spin_unlock_irq);
#endif
#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
{
__raw_spin_unlock_bh(lock);
}
EXPORT_SYMBOL(_raw_spin_unlock_bh);
#endif
#ifndef CONFIG_INLINE_READ_TRYLOCK
int __lockfunc _raw_read_trylock(rwlock_t *lock)
{
return __raw_read_trylock(lock);
}
EXPORT_SYMBOL(_raw_read_trylock);
#endif
#ifndef CONFIG_INLINE_READ_LOCK
void __lockfunc _raw_read_lock(rwlock_t *lock)
{
__raw_read_lock(lock);
}
EXPORT_SYMBOL(_raw_read_lock);
#endif
#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
{
return __raw_read_lock_irqsave(lock);
}
EXPORT_SYMBOL(_raw_read_lock_irqsave);
#endif
#ifndef CONFIG_INLINE_READ_LOCK_IRQ
void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
{
__raw_read_lock_irq(lock);
}
EXPORT_SYMBOL(_raw_read_lock_irq);
#endif
#ifndef CONFIG_INLINE_READ_LOCK_BH
void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
{
__raw_read_lock_bh(lock);
}
EXPORT_SYMBOL(_raw_read_lock_bh);
#endif
#ifndef CONFIG_INLINE_READ_UNLOCK
void __lockfunc _raw_read_unlock(rwlock_t *lock)
{
__raw_read_unlock(lock);
}
EXPORT_SYMBOL(_raw_read_unlock);
#endif
#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__raw_read_unlock_irqrestore(lock, flags);
}
EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
#endif
#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
{
__raw_read_unlock_irq(lock);
}
EXPORT_SYMBOL(_raw_read_unlock_irq);
#endif
#ifndef CONFIG_INLINE_READ_UNLOCK_BH
void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
{
__raw_read_unlock_bh(lock);
}
EXPORT_SYMBOL(_raw_read_unlock_bh);
#endif
#ifndef CONFIG_INLINE_WRITE_TRYLOCK
int __lockfunc _raw_write_trylock(rwlock_t *lock)
{
return __raw_write_trylock(lock);
}
EXPORT_SYMBOL(_raw_write_trylock);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK
void __lockfunc _raw_write_lock(rwlock_t *lock)
{
__raw_write_lock(lock);
}
EXPORT_SYMBOL(_raw_write_lock);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
{
return __raw_write_lock_irqsave(lock);
}
EXPORT_SYMBOL(_raw_write_lock_irqsave);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
{
__raw_write_lock_irq(lock);
}
EXPORT_SYMBOL(_raw_write_lock_irq);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_BH
void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
{
__raw_write_lock_bh(lock);
}
EXPORT_SYMBOL(_raw_write_lock_bh);
#endif
#ifndef CONFIG_INLINE_WRITE_UNLOCK
void __lockfunc _raw_write_unlock(rwlock_t *lock)
{
__raw_write_unlock(lock);
}
EXPORT_SYMBOL(_raw_write_unlock);
#endif
#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__raw_write_unlock_irqrestore(lock, flags);
}
EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
#endif
#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
{
__raw_write_unlock_irq(lock);
}
EXPORT_SYMBOL(_raw_write_unlock_irq);
#endif
#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
{
__raw_write_unlock_bh(lock);
}
EXPORT_SYMBOL(_raw_write_unlock_bh);
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
{
preempt_disable();
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
EXPORT_SYMBOL(_raw_spin_lock_nested);
unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
int subclass)
{
unsigned long flags;
local_irq_save(flags);
preempt_disable();
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
do_raw_spin_lock_flags, &flags);
return flags;
}
EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
struct lockdep_map *nest_lock)
{
preempt_disable();
spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
#endif
notrace int in_lock_functions(unsigned long addr)
{
/* Linker adds these: start and end of __lockfunc functions */
extern char __lock_text_start[], __lock_text_end[];
return addr >= (unsigned long)__lock_text_start
&& addr < (unsigned long)__lock_text_end;
}
EXPORT_SYMBOL(in_lock_functions);

View file

@ -0,0 +1,302 @@
/*
* Copyright 2005, Red Hat, Inc., Ingo Molnar
* Released under the General Public License (GPL).
*
* This file contains the spinlock/rwlock implementations for
* DEBUG_SPINLOCK.
*/
#include <linux/spinlock.h>
#include <linux/nmi.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include <linux/delay.h>
#include <linux/export.h>
void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
#endif
lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
lock->magic = SPINLOCK_MAGIC;
lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}
EXPORT_SYMBOL(__raw_spin_lock_init);
void __rwlock_init(rwlock_t *lock, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
#endif
lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
lock->magic = RWLOCK_MAGIC;
lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}
EXPORT_SYMBOL(__rwlock_init);
static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
struct task_struct *owner = NULL;
if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
owner = lock->owner;
printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
msg, raw_smp_processor_id(),
current->comm, task_pid_nr(current));
printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
".owner_cpu: %d\n",
lock, lock->magic,
owner ? owner->comm : "<none>",
owner ? task_pid_nr(owner) : -1,
lock->owner_cpu);
dump_stack();
}
static void spin_bug(raw_spinlock_t *lock, const char *msg)
{
if (!debug_locks_off())
return;
spin_dump(lock, msg);
}
#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
static inline void
debug_spin_lock_before(raw_spinlock_t *lock)
{
SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
SPIN_BUG_ON(lock->owner == current, lock, "recursion");
SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
lock, "cpu recursion");
}
static inline void debug_spin_lock_after(raw_spinlock_t *lock)
{
lock->owner_cpu = raw_smp_processor_id();
lock->owner = current;
}
static inline void debug_spin_unlock(raw_spinlock_t *lock)
{
SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}
static void __spin_lock_debug(raw_spinlock_t *lock)
{
u64 i;
u64 loops = loops_per_jiffy * HZ;
for (i = 0; i < loops; i++) {
if (arch_spin_trylock(&lock->raw_lock))
return;
__delay(1);
}
/* lockup suspected: */
spin_dump(lock, "lockup suspected");
#ifdef CONFIG_SMP
trigger_all_cpu_backtrace();
#endif
/*
* The trylock above was causing a livelock. Give the lower level arch
* specific lock code a chance to acquire the lock. We have already
* printed a warning/backtrace at this point. The non-debug arch
* specific code might actually succeed in acquiring the lock. If it is
* not successful, the end-result is the same - there is no forward
* progress.
*/
arch_spin_lock(&lock->raw_lock);
}
void do_raw_spin_lock(raw_spinlock_t *lock)
{
debug_spin_lock_before(lock);
if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
__spin_lock_debug(lock);
debug_spin_lock_after(lock);
}
int do_raw_spin_trylock(raw_spinlock_t *lock)
{
int ret = arch_spin_trylock(&lock->raw_lock);
if (ret)
debug_spin_lock_after(lock);
#ifndef CONFIG_SMP
/*
* Must not happen on UP:
*/
SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
#endif
return ret;
}
void do_raw_spin_unlock(raw_spinlock_t *lock)
{
debug_spin_unlock(lock);
arch_spin_unlock(&lock->raw_lock);
}
static void rwlock_bug(rwlock_t *lock, const char *msg)
{
if (!debug_locks_off())
return;
printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
msg, raw_smp_processor_id(), current->comm,
task_pid_nr(current), lock);
dump_stack();
}
#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
#if 0 /* __write_lock_debug() can lock up - maybe this can too? */
static void __read_lock_debug(rwlock_t *lock)
{
u64 i;
u64 loops = loops_per_jiffy * HZ;
int print_once = 1;
for (;;) {
for (i = 0; i < loops; i++) {
if (arch_read_trylock(&lock->raw_lock))
return;
__delay(1);
}
/* lockup suspected: */
if (print_once) {
print_once = 0;
printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
"%s/%d, %p\n",
raw_smp_processor_id(), current->comm,
current->pid, lock);
dump_stack();
}
}
}
#endif
void do_raw_read_lock(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
arch_read_lock(&lock->raw_lock);
}
int do_raw_read_trylock(rwlock_t *lock)
{
int ret = arch_read_trylock(&lock->raw_lock);
#ifndef CONFIG_SMP
/*
* Must not happen on UP:
*/
RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
#endif
return ret;
}
void do_raw_read_unlock(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
arch_read_unlock(&lock->raw_lock);
}
static inline void debug_write_lock_before(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
lock, "cpu recursion");
}
static inline void debug_write_lock_after(rwlock_t *lock)
{
lock->owner_cpu = raw_smp_processor_id();
lock->owner = current;
}
static inline void debug_write_unlock(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}
#if 0 /* This can cause lockups */
static void __write_lock_debug(rwlock_t *lock)
{
u64 i;
u64 loops = loops_per_jiffy * HZ;
int print_once = 1;
for (;;) {
for (i = 0; i < loops; i++) {
if (arch_write_trylock(&lock->raw_lock))
return;
__delay(1);
}
/* lockup suspected: */
if (print_once) {
print_once = 0;
printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
"%s/%d, %p\n",
raw_smp_processor_id(), current->comm,
current->pid, lock);
dump_stack();
}
}
}
#endif
void do_raw_write_lock(rwlock_t *lock)
{
debug_write_lock_before(lock);
arch_write_lock(&lock->raw_lock);
debug_write_lock_after(lock);
}
int do_raw_write_trylock(rwlock_t *lock)
{
int ret = arch_write_trylock(&lock->raw_lock);
if (ret)
debug_write_lock_after(lock);
#ifndef CONFIG_SMP
/*
* Must not happen on UP:
*/
RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
#endif
return ret;
}
void do_raw_write_unlock(rwlock_t *lock)
{
debug_write_unlock(lock);
arch_write_unlock(&lock->raw_lock);
}