Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

6
kernel/rcu/Makefile Normal file
View file

@ -0,0 +1,6 @@
obj-y += update.o srcu.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_TREE_RCU) += tree.o
obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o
obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
obj-$(CONFIG_TINY_RCU) += tiny.o

138
kernel/rcu/rcu.h Normal file
View file

@ -0,0 +1,138 @@
/*
* Read-Copy Update definitions shared among RCU implementations.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2011
*
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#ifndef __LINUX_RCU_H
#define __LINUX_RCU_H
#include <trace/events/rcu.h>
#ifdef CONFIG_RCU_TRACE
#define RCU_TRACE(stmt) stmt
#else /* #ifdef CONFIG_RCU_TRACE */
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/*
* Process-level increment to ->dynticks_nesting field. This allows for
* architectures that use half-interrupts and half-exceptions from
* process context.
*
* DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
* that counts the number of process-based reasons why RCU cannot
* consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
* is the value used to increment or decrement this field.
*
* The rest of the bits could in principle be used to count interrupts,
* but this would mean that a negative-one value in the interrupt
* field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
* We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
* that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
* The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
* initial exit from idle.
*/
#define DYNTICK_TASK_NEST_WIDTH 7
#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \
DYNTICK_TASK_FLAG)
/*
* debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
* by call_rcu() and rcu callback execution, and are therefore not part of the
* RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
*/
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
# define STATE_RCU_HEAD_READY 0
# define STATE_RCU_HEAD_QUEUED 1
extern struct debug_obj_descr rcuhead_debug_descr;
static inline int debug_rcu_head_queue(struct rcu_head *head)
{
int r1;
r1 = debug_object_activate(head, &rcuhead_debug_descr);
debug_object_active_state(head, &rcuhead_debug_descr,
STATE_RCU_HEAD_READY,
STATE_RCU_HEAD_QUEUED);
return r1;
}
static inline void debug_rcu_head_unqueue(struct rcu_head *head)
{
debug_object_active_state(head, &rcuhead_debug_descr,
STATE_RCU_HEAD_QUEUED,
STATE_RCU_HEAD_READY);
debug_object_deactivate(head, &rcuhead_debug_descr);
}
#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
static inline int debug_rcu_head_queue(struct rcu_head *head)
{
return 0;
}
static inline void debug_rcu_head_unqueue(struct rcu_head *head)
{
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
void kfree(const void *);
/*
* Reclaim the specified callback, either by invoking it (non-lazy case)
* or freeing it directly (lazy case). Return true if lazy, false otherwise.
*/
static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
{
unsigned long offset = (unsigned long)head->func;
rcu_lock_acquire(&rcu_callback_map);
if (__is_kfree_rcu_offset(offset)) {
RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
kfree((void *)head - offset);
rcu_lock_release(&rcu_callback_map);
return true;
} else {
RCU_TRACE(trace_rcu_invoke_callback(rn, head));
head->func(head);
rcu_lock_release(&rcu_callback_map);
return false;
}
}
#ifdef CONFIG_RCU_STALL_COMMON
extern int rcu_cpu_stall_suppress;
int rcu_jiffies_till_stall_check(void);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
/*
* Strings used in tracepoints need to be exported via the
* tracing system such that tools like perf and trace-cmd can
* translate the string address pointers to actual text.
*/
#define TPS(x) tracepoint_string(x)
#endif /* __LINUX_RCU_H */

1825
kernel/rcu/rcutorture.c Normal file

File diff suppressed because it is too large Load diff

693
kernel/rcu/srcu.c Normal file
View file

@ -0,0 +1,693 @@
/*
* Sleepable Read-Copy Update mechanism for mutual exclusion.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright (C) IBM Corporation, 2006
* Copyright (C) Fujitsu, 2012
*
* Author: Paul McKenney <paulmck@us.ibm.com>
* Lai Jiangshan <laijs@cn.fujitsu.com>
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU/ *.txt
*
*/
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/srcu.h>
#include "rcu.h"
/*
* Initialize an rcu_batch structure to empty.
*/
static inline void rcu_batch_init(struct rcu_batch *b)
{
b->head = NULL;
b->tail = &b->head;
}
/*
* Enqueue a callback onto the tail of the specified rcu_batch structure.
*/
static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
{
*b->tail = head;
b->tail = &head->next;
}
/*
* Is the specified rcu_batch structure empty?
*/
static inline bool rcu_batch_empty(struct rcu_batch *b)
{
return b->tail == &b->head;
}
/*
* Remove the callback at the head of the specified rcu_batch structure
* and return a pointer to it, or return NULL if the structure is empty.
*/
static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
{
struct rcu_head *head;
if (rcu_batch_empty(b))
return NULL;
head = b->head;
b->head = head->next;
if (b->tail == &head->next)
rcu_batch_init(b);
return head;
}
/*
* Move all callbacks from the rcu_batch structure specified by "from" to
* the structure specified by "to".
*/
static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
{
if (!rcu_batch_empty(from)) {
*to->tail = from->head;
to->tail = from->tail;
rcu_batch_init(from);
}
}
static int init_srcu_struct_fields(struct srcu_struct *sp)
{
sp->completed = 0;
spin_lock_init(&sp->queue_lock);
sp->running = false;
rcu_batch_init(&sp->batch_queue);
rcu_batch_init(&sp->batch_check0);
rcu_batch_init(&sp->batch_check1);
rcu_batch_init(&sp->batch_done);
INIT_DELAYED_WORK(&sp->work, process_srcu);
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
return sp->per_cpu_ref ? 0 : -ENOMEM;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int __init_srcu_struct(struct srcu_struct *sp, const char *name,
struct lock_class_key *key)
{
/* Don't re-initialize a lock while it is held. */
debug_check_no_locks_freed((void *)sp, sizeof(*sp));
lockdep_init_map(&sp->dep_map, name, key, 0);
return init_srcu_struct_fields(sp);
}
EXPORT_SYMBOL_GPL(__init_srcu_struct);
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/**
* init_srcu_struct - initialize a sleep-RCU structure
* @sp: structure to initialize.
*
* Must invoke this on a given srcu_struct before passing that srcu_struct
* to any other function. Each srcu_struct represents a separate domain
* of SRCU protection.
*/
int init_srcu_struct(struct srcu_struct *sp)
{
return init_srcu_struct_fields(sp);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
* Returns approximate total of the readers' ->seq[] values for the
* rank of per-CPU counters specified by idx.
*/
static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
{
int cpu;
unsigned long sum = 0;
unsigned long t;
for_each_possible_cpu(cpu) {
t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
sum += t;
}
return sum;
}
/*
* Returns approximate number of readers active on the specified rank
* of the per-CPU ->c[] counters.
*/
static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
{
int cpu;
unsigned long sum = 0;
unsigned long t;
for_each_possible_cpu(cpu) {
t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
sum += t;
}
return sum;
}
/*
* Return true if the number of pre-existing readers is determined to
* be stably zero. An example unstable zero can occur if the call
* to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
* but due to task migration, sees the corresponding __srcu_read_unlock()
* decrement. This can happen because srcu_readers_active_idx() takes
* time to sum the array, and might in fact be interrupted or preempted
* partway through the summation.
*/
static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
{
unsigned long seq;
seq = srcu_readers_seq_idx(sp, idx);
/*
* The following smp_mb() A pairs with the smp_mb() B located in
* __srcu_read_lock(). This pairing ensures that if an
* __srcu_read_lock() increments its counter after the summation
* in srcu_readers_active_idx(), then the corresponding SRCU read-side
* critical section will see any changes made prior to the start
* of the current SRCU grace period.
*
* Also, if the above call to srcu_readers_seq_idx() saw the
* increment of ->seq[], then the call to srcu_readers_active_idx()
* must see the increment of ->c[].
*/
smp_mb(); /* A */
/*
* Note that srcu_readers_active_idx() can incorrectly return
* zero even though there is a pre-existing reader throughout.
* To see this, suppose that task A is in a very long SRCU
* read-side critical section that started on CPU 0, and that
* no other reader exists, so that the sum of the counters
* is equal to one. Then suppose that task B starts executing
* srcu_readers_active_idx(), summing up to CPU 1, and then that
* task C starts reading on CPU 0, so that its increment is not
* summed, but finishes reading on CPU 2, so that its decrement
* -is- summed. Then when task B completes its sum, it will
* incorrectly get zero, despite the fact that task A has been
* in its SRCU read-side critical section the whole time.
*
* We therefore do a validation step should srcu_readers_active_idx()
* return zero.
*/
if (srcu_readers_active_idx(sp, idx) != 0)
return false;
/*
* The remainder of this function is the validation step.
* The following smp_mb() D pairs with the smp_mb() C in
* __srcu_read_unlock(). If the __srcu_read_unlock() was seen
* by srcu_readers_active_idx() above, then any destructive
* operation performed after the grace period will happen after
* the corresponding SRCU read-side critical section.
*
* Note that there can be at most NR_CPUS worth of readers using
* the old index, which is not enough to overflow even a 32-bit
* integer. (Yes, this does mean that systems having more than
* a billion or so CPUs need to be 64-bit systems.) Therefore,
* the sum of the ->seq[] counters cannot possibly overflow.
* Therefore, the only way that the return values of the two
* calls to srcu_readers_seq_idx() can be equal is if there were
* no increments of the corresponding rank of ->seq[] counts
* in the interim. But the missed-increment scenario laid out
* above includes an increment of the ->seq[] counter by
* the corresponding __srcu_read_lock(). Therefore, if this
* scenario occurs, the return values from the two calls to
* srcu_readers_seq_idx() will differ, and thus the validation
* step below suffices.
*/
smp_mb(); /* D */
return srcu_readers_seq_idx(sp, idx) == seq;
}
/**
* srcu_readers_active - returns approximate number of readers.
* @sp: which srcu_struct to count active readers (holding srcu_read_lock).
*
* Note that this is not an atomic primitive, and can therefore suffer
* severe errors when invoked on an active srcu_struct. That said, it
* can be useful as an error check at cleanup time.
*/
static int srcu_readers_active(struct srcu_struct *sp)
{
int cpu;
unsigned long sum = 0;
for_each_possible_cpu(cpu) {
sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
}
return sum;
}
/**
* cleanup_srcu_struct - deconstruct a sleep-RCU structure
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
void cleanup_srcu_struct(struct srcu_struct *sp)
{
if (WARN_ON(srcu_readers_active(sp)))
return; /* Leakage unless caller handles error. */
free_percpu(sp->per_cpu_ref);
sp->per_cpu_ref = NULL;
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
int __srcu_read_lock(struct srcu_struct *sp)
{
int idx;
idx = ACCESS_ONCE(sp->completed) & 0x1;
preempt_disable();
__this_cpu_inc(sp->per_cpu_ref->c[idx]);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
preempt_enable();
return idx;
}
EXPORT_SYMBOL_GPL(__srcu_read_lock);
/*
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
* Must be called from process context.
*/
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{
smp_mb(); /* C */ /* Avoid leaking the critical section. */
this_cpu_dec(sp->per_cpu_ref->c[idx]);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
* We use an adaptive strategy for synchronize_srcu() and especially for
* synchronize_srcu_expedited(). We spin for a fixed time period
* (defined below) to allow SRCU readers to exit their read-side critical
* sections. If there are still some readers after 10 microseconds,
* we repeatedly block for 1-millisecond time periods. This approach
* has done well in testing, so there is no need for a config parameter.
*/
#define SRCU_RETRY_CHECK_DELAY 5
#define SYNCHRONIZE_SRCU_TRYCOUNT 2
#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
/*
* @@@ Wait until all pre-existing readers complete. Such readers
* will have used the index specified by "idx".
* the caller should ensures the ->completed is not changed while checking
* and idx = (->completed & 1) ^ 1
*/
static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
{
for (;;) {
if (srcu_readers_active_idx_check(sp, idx))
return true;
if (--trycount <= 0)
return false;
udelay(SRCU_RETRY_CHECK_DELAY);
}
}
/*
* Increment the ->completed counter so that future SRCU readers will
* use the other rank of the ->c[] and ->seq[] arrays. This allows
* us to wait for pre-existing readers in a starvation-free manner.
*/
static void srcu_flip(struct srcu_struct *sp)
{
sp->completed++;
}
/*
* Enqueue an SRCU callback on the specified srcu_struct structure,
* initiating grace-period processing if it is not already running.
*
* Note that all CPUs must agree that the grace period extended beyond
* all pre-existing SRCU read-side critical section. On systems with
* more than one CPU, this means that when "func()" is invoked, each CPU
* is guaranteed to have executed a full memory barrier since the end of
* its last corresponding SRCU read-side critical section whose beginning
* preceded the call to call_rcu(). It also means that each CPU executing
* an SRCU read-side critical section that continues beyond the start of
* "func()" must have executed a memory barrier after the call_rcu()
* but before the beginning of that SRCU read-side critical section.
* Note that these guarantees include CPUs that are offline, idle, or
* executing in user mode, as well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
* resulting SRCU callback function "func()", then both CPU A and CPU
* B are guaranteed to execute a full memory barrier during the time
* interval between the call to call_rcu() and the invocation of "func()".
* This guarantee applies even if CPU A and CPU B are the same CPU (but
* again only if the system has more than one CPU).
*
* Of course, these guarantees apply only for invocations of call_srcu(),
* srcu_read_lock(), and srcu_read_unlock() that are all passed the same
* srcu_struct structure.
*/
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head))
{
unsigned long flags;
head->next = NULL;
head->func = func;
spin_lock_irqsave(&sp->queue_lock, flags);
rcu_batch_queue(&sp->batch_queue, head);
if (!sp->running) {
sp->running = true;
queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
}
spin_unlock_irqrestore(&sp->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(call_srcu);
struct rcu_synchronize {
struct rcu_head head;
struct completion completion;
};
/*
* Awaken the corresponding synchronize_srcu() instance now that a
* grace period has elapsed.
*/
static void wakeme_after_rcu(struct rcu_head *head)
{
struct rcu_synchronize *rcu;
rcu = container_of(head, struct rcu_synchronize, head);
complete(&rcu->completion);
}
static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
static void srcu_reschedule(struct srcu_struct *sp);
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
{
struct rcu_synchronize rcu;
struct rcu_head *head = &rcu.head;
bool done = false;
rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
!lock_is_held(&rcu_bh_lock_map) &&
!lock_is_held(&rcu_lock_map) &&
!lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
might_sleep();
init_completion(&rcu.completion);
head->next = NULL;
head->func = wakeme_after_rcu;
spin_lock_irq(&sp->queue_lock);
if (!sp->running) {
/* steal the processing owner */
sp->running = true;
rcu_batch_queue(&sp->batch_check0, head);
spin_unlock_irq(&sp->queue_lock);
srcu_advance_batches(sp, trycount);
if (!rcu_batch_empty(&sp->batch_done)) {
BUG_ON(sp->batch_done.head != head);
rcu_batch_dequeue(&sp->batch_done);
done = true;
}
/* give the processing owner to work_struct */
srcu_reschedule(sp);
} else {
rcu_batch_queue(&sp->batch_queue, head);
spin_unlock_irq(&sp->queue_lock);
}
if (!done)
wait_for_completion(&rcu.completion);
}
/**
* synchronize_srcu - wait for prior SRCU read-side critical-section completion
* @sp: srcu_struct with which to synchronize.
*
* Wait for the count to drain to zero of both indexes. To avoid the
* possible starvation of synchronize_srcu(), it waits for the count of
* the index=((->completed & 1) ^ 1) to drain to zero at first,
* and then flip the completed and wait for the count of the other index.
*
* Can block; must be called from process context.
*
* Note that it is illegal to call synchronize_srcu() from the corresponding
* SRCU read-side critical section; doing so will result in deadlock.
* However, it is perfectly legal to call synchronize_srcu() on one
* srcu_struct from some other srcu_struct's read-side critical section,
* as long as the resulting graph of srcu_structs is acyclic.
*
* There are memory-ordering constraints implied by synchronize_srcu().
* On systems with more than one CPU, when synchronize_srcu() returns,
* each CPU is guaranteed to have executed a full memory barrier since
* the end of its last corresponding SRCU-sched read-side critical section
* whose beginning preceded the call to synchronize_srcu(). In addition,
* each CPU having an SRCU read-side critical section that extends beyond
* the return from synchronize_srcu() is guaranteed to have executed a
* full memory barrier after the beginning of synchronize_srcu() and before
* the beginning of that SRCU read-side critical section. Note that these
* guarantees include CPUs that are offline, idle, or executing in user mode,
* as well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked synchronize_srcu(), which returned
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
* to have executed a full memory barrier during the execution of
* synchronize_srcu(). This guarantee applies even if CPU A and CPU B
* are the same CPU, but again only if the system has more than one CPU.
*
* Of course, these memory-ordering guarantees apply only when
* synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
* passed the same srcu_struct structure.
*/
void synchronize_srcu(struct srcu_struct *sp)
{
__synchronize_srcu(sp, rcu_expedited
? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
: SYNCHRONIZE_SRCU_TRYCOUNT);
}
EXPORT_SYMBOL_GPL(synchronize_srcu);
/**
* synchronize_srcu_expedited - Brute-force SRCU grace period
* @sp: srcu_struct with which to synchronize.
*
* Wait for an SRCU grace period to elapse, but be more aggressive about
* spinning rather than blocking when waiting.
*
* Note that synchronize_srcu_expedited() has the same deadlock and
* memory-ordering properties as does synchronize_srcu().
*/
void synchronize_srcu_expedited(struct srcu_struct *sp)
{
__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
}
EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
/**
* srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
* @sp: srcu_struct on which to wait for in-flight callbacks.
*/
void srcu_barrier(struct srcu_struct *sp)
{
synchronize_srcu(sp);
}
EXPORT_SYMBOL_GPL(srcu_barrier);
/**
* srcu_batches_completed - return batches completed.
* @sp: srcu_struct on which to report batch completion.
*
* Report the number of batches, correlated with, but not necessarily
* precisely the same as, the number of grace periods that have elapsed.
*/
long srcu_batches_completed(struct srcu_struct *sp)
{
return sp->completed;
}
EXPORT_SYMBOL_GPL(srcu_batches_completed);
#define SRCU_CALLBACK_BATCH 10
#define SRCU_INTERVAL 1
/*
* Move any new SRCU callbacks to the first stage of the SRCU grace
* period pipeline.
*/
static void srcu_collect_new(struct srcu_struct *sp)
{
if (!rcu_batch_empty(&sp->batch_queue)) {
spin_lock_irq(&sp->queue_lock);
rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
spin_unlock_irq(&sp->queue_lock);
}
}
/*
* Core SRCU state machine. Advance callbacks from ->batch_check0 to
* ->batch_check1 and then to ->batch_done as readers drain.
*/
static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
{
int idx = 1 ^ (sp->completed & 1);
/*
* Because readers might be delayed for an extended period after
* fetching ->completed for their index, at any point in time there
* might well be readers using both idx=0 and idx=1. We therefore
* need to wait for readers to clear from both index values before
* invoking a callback.
*/
if (rcu_batch_empty(&sp->batch_check0) &&
rcu_batch_empty(&sp->batch_check1))
return; /* no callbacks need to be advanced */
if (!try_check_zero(sp, idx, trycount))
return; /* failed to advance, will try after SRCU_INTERVAL */
/*
* The callbacks in ->batch_check1 have already done with their
* first zero check and flip back when they were enqueued on
* ->batch_check0 in a previous invocation of srcu_advance_batches().
* (Presumably try_check_zero() returned false during that
* invocation, leaving the callbacks stranded on ->batch_check1.)
* They are therefore ready to invoke, so move them to ->batch_done.
*/
rcu_batch_move(&sp->batch_done, &sp->batch_check1);
if (rcu_batch_empty(&sp->batch_check0))
return; /* no callbacks need to be advanced */
srcu_flip(sp);
/*
* The callbacks in ->batch_check0 just finished their
* first check zero and flip, so move them to ->batch_check1
* for future checking on the other idx.
*/
rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
/*
* SRCU read-side critical sections are normally short, so check
* at least twice in quick succession after a flip.
*/
trycount = trycount < 2 ? 2 : trycount;
if (!try_check_zero(sp, idx^1, trycount))
return; /* failed to advance, will try after SRCU_INTERVAL */
/*
* The callbacks in ->batch_check1 have now waited for all
* pre-existing readers using both idx values. They are therefore
* ready to invoke, so move them to ->batch_done.
*/
rcu_batch_move(&sp->batch_done, &sp->batch_check1);
}
/*
* Invoke a limited number of SRCU callbacks that have passed through
* their grace period. If there are more to do, SRCU will reschedule
* the workqueue.
*/
static void srcu_invoke_callbacks(struct srcu_struct *sp)
{
int i;
struct rcu_head *head;
for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
head = rcu_batch_dequeue(&sp->batch_done);
if (!head)
break;
local_bh_disable();
head->func(head);
local_bh_enable();
}
}
/*
* Finished one round of SRCU grace period. Start another if there are
* more SRCU callbacks queued, otherwise put SRCU into not-running state.
*/
static void srcu_reschedule(struct srcu_struct *sp)
{
bool pending = true;
if (rcu_batch_empty(&sp->batch_done) &&
rcu_batch_empty(&sp->batch_check1) &&
rcu_batch_empty(&sp->batch_check0) &&
rcu_batch_empty(&sp->batch_queue)) {
spin_lock_irq(&sp->queue_lock);
if (rcu_batch_empty(&sp->batch_done) &&
rcu_batch_empty(&sp->batch_check1) &&
rcu_batch_empty(&sp->batch_check0) &&
rcu_batch_empty(&sp->batch_queue)) {
sp->running = false;
pending = false;
}
spin_unlock_irq(&sp->queue_lock);
}
if (pending)
queue_delayed_work(system_power_efficient_wq,
&sp->work, SRCU_INTERVAL);
}
/*
* This is the work-queue function that handles SRCU grace periods.
*/
void process_srcu(struct work_struct *work)
{
struct srcu_struct *sp;
sp = container_of(work, struct srcu_struct, work.work);
srcu_collect_new(sp);
srcu_advance_batches(sp, 1);
srcu_invoke_callbacks(sp);
srcu_reschedule(sp);
}
EXPORT_SYMBOL_GPL(process_srcu);

386
kernel/rcu/tiny.c Normal file
View file

@ -0,0 +1,386 @@
/*
* Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2008
*
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*/
#include <linux/completion.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/cpu.h>
#include <linux/prefetch.h>
#include <linux/ftrace_event.h>
#include "rcu.h"
/* Forward declarations for tiny_plugin.h. */
struct rcu_ctrlblk;
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static void rcu_process_callbacks(struct softirq_action *unused);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);
static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
#include "tiny_plugin.h"
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
static void rcu_idle_enter_common(long long newval)
{
if (newval) {
RCU_TRACE(trace_rcu_dyntick(TPS("--="),
rcu_dynticks_nesting, newval));
rcu_dynticks_nesting = newval;
return;
}
RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
rcu_dynticks_nesting, newval));
if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
rcu_dynticks_nesting, newval));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
rcu_sched_qs(); /* implies rcu_bh_inc() */
barrier();
rcu_dynticks_nesting = newval;
}
/*
* Enter idle, which is an extended quiescent state if we have fully
* entered that mode (i.e., if the new value of dynticks_nesting is zero).
*/
void rcu_idle_enter(void)
{
unsigned long flags;
long long newval;
local_irq_save(flags);
WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
DYNTICK_TASK_NEST_VALUE)
newval = 0;
else
newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
rcu_idle_enter_common(newval);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
/*
* Exit an interrupt handler towards idle.
*/
void rcu_irq_exit(void)
{
unsigned long flags;
long long newval;
local_irq_save(flags);
newval = rcu_dynticks_nesting - 1;
WARN_ON_ONCE(newval < 0);
rcu_idle_enter_common(newval);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_irq_exit);
/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
static void rcu_idle_exit_common(long long oldval)
{
if (oldval) {
RCU_TRACE(trace_rcu_dyntick(TPS("++="),
oldval, rcu_dynticks_nesting));
return;
}
RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
oldval, rcu_dynticks_nesting));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
}
/*
* Exit idle, so that we are no longer in an extended quiescent state.
*/
void rcu_idle_exit(void)
{
unsigned long flags;
long long oldval;
local_irq_save(flags);
oldval = rcu_dynticks_nesting;
WARN_ON_ONCE(rcu_dynticks_nesting < 0);
if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
else
rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);
/*
* Enter an interrupt handler, moving away from idle.
*/
void rcu_irq_enter(void)
{
unsigned long flags;
long long oldval;
local_irq_save(flags);
oldval = rcu_dynticks_nesting;
rcu_dynticks_nesting++;
WARN_ON_ONCE(rcu_dynticks_nesting == 0);
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_irq_enter);
#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
/*
* Test whether RCU thinks that the current CPU is idle.
*/
bool notrace __rcu_is_watching(void)
{
return rcu_dynticks_nesting;
}
EXPORT_SYMBOL(__rcu_is_watching);
#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
/*
* Test whether the current CPU was interrupted from idle. Nested
* interrupts don't count, we must be running at the first interrupt
* level.
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
return rcu_dynticks_nesting <= 1;
}
/*
* Helper function for rcu_sched_qs() and rcu_bh_qs().
* Also irqs are disabled to avoid confusion due to interrupt handlers
* invoking call_rcu().
*/
static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
{
RCU_TRACE(reset_cpu_stall_ticks(rcp));
if (rcp->rcucblist != NULL &&
rcp->donetail != rcp->curtail) {
rcp->donetail = rcp->curtail;
return 1;
}
return 0;
}
/*
* Record an rcu quiescent state. And an rcu_bh quiescent state while we
* are at it, given that any rcu quiescent state is also an rcu_bh
* quiescent state. Use "+" instead of "||" to defeat short circuiting.
*/
void rcu_sched_qs(void)
{
unsigned long flags;
local_irq_save(flags);
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
local_irq_restore(flags);
}
/*
* Record an rcu_bh quiescent state.
*/
void rcu_bh_qs(void)
{
unsigned long flags;
local_irq_save(flags);
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
local_irq_restore(flags);
}
/*
* Check to see if the scheduling-clock interrupt came from an extended
* quiescent state, and, if so, tell RCU about it. This function must
* be called from hardirq context. It is normally called from the
* scheduling-clock interrupt.
*/
void rcu_check_callbacks(int cpu, int user)
{
RCU_TRACE(check_cpu_stalls());
if (user || rcu_is_cpu_rrupt_from_idle())
rcu_sched_qs();
else if (!in_softirq())
rcu_bh_qs();
if (user)
rcu_note_voluntary_context_switch(current);
}
/*
* Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
* whose grace period has elapsed.
*/
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
{
const char *rn = NULL;
struct rcu_head *next, *list;
unsigned long flags;
RCU_TRACE(int cb_count = 0);
/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail) {
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
!!ACCESS_ONCE(rcp->rcucblist),
need_resched(),
is_idle_task(current),
false));
return;
}
/* Move the ready-to-invoke callbacks to a local list. */
local_irq_save(flags);
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
list = rcp->rcucblist;
rcp->rcucblist = *rcp->donetail;
*rcp->donetail = NULL;
if (rcp->curtail == rcp->donetail)
rcp->curtail = &rcp->rcucblist;
rcp->donetail = &rcp->rcucblist;
local_irq_restore(flags);
/* Invoke the callbacks on the local list. */
RCU_TRACE(rn = rcp->name);
while (list) {
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
local_bh_disable();
__rcu_reclaim(rn, list);
local_bh_enable();
list = next;
RCU_TRACE(cb_count++);
}
RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
RCU_TRACE(trace_rcu_batch_end(rcp->name,
cb_count, 0, need_resched(),
is_idle_task(current),
false));
}
static void rcu_process_callbacks(struct softirq_action *unused)
{
__rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
}
/*
* Wait for a grace period to elapse. But it is illegal to invoke
* synchronize_sched() from within an RCU read-side critical section.
* Therefore, any legal call to synchronize_sched() is a quiescent
* state, and so on a UP system, synchronize_sched() need do nothing.
* Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the
* benefits of doing might_sleep() to reduce latency.)
*
* Cool, huh? (Due to Josh Triplett.)
*
* But we want to make this a static inline later. The cond_resched()
* currently makes this problematic.
*/
void synchronize_sched(void)
{
rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
!lock_is_held(&rcu_lock_map) &&
!lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_sched() in RCU read-side critical section");
cond_resched();
}
EXPORT_SYMBOL_GPL(synchronize_sched);
/*
* Helper function for call_rcu() and call_rcu_bh().
*/
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp)
{
unsigned long flags;
debug_rcu_head_queue(head);
head->func = func;
head->next = NULL;
local_irq_save(flags);
*rcp->curtail = head;
rcp->curtail = &head->next;
RCU_TRACE(rcp->qlen++);
local_irq_restore(flags);
}
/*
* Post an RCU callback to be invoked after the end of an RCU-sched grace
* period. But since we have but one CPU, that would be after any
* quiescent state.
*/
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_sched_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu_sched);
/*
* Post an RCU bottom-half callback to be invoked after any subsequent
* quiescent state.
*/
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_bh_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
void rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}

174
kernel/rcu/tiny_plugin.h Normal file
View file

@ -0,0 +1,174 @@
/*
* Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
* Internal non-public definitions that provide either classic
* or preemptible semantics.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright (c) 2010 Linaro
*
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
RCU_TRACE(long qlen); /* Number of pending CBs. */
RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
RCU_TRACE(const char *name); /* Name of RCU type. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_sched_ctrlblk = {
.donetail = &rcu_sched_ctrlblk.rcucblist,
.curtail = &rcu_sched_ctrlblk.rcucblist,
RCU_TRACE(.name = "rcu_sched")
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.donetail = &rcu_bh_ctrlblk.rcucblist,
.curtail = &rcu_bh_ctrlblk.rcucblist,
RCU_TRACE(.name = "rcu_bh")
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#include <linux/kernel_stat.h>
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
/*
* During boot, we forgive RCU lockdep issues. After this function is
* invoked, we start taking RCU lockdep issues seriously.
*/
void __init rcu_scheduler_starting(void)
{
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
}
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_RCU_TRACE
static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
{
unsigned long flags;
local_irq_save(flags);
rcp->qlen -= n;
local_irq_restore(flags);
}
/*
* Dump statistics for TINY_RCU, such as they are.
*/
static int show_tiny_stats(struct seq_file *m, void *unused)
{
seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
return 0;
}
static int show_tiny_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, show_tiny_stats, NULL);
}
static const struct file_operations show_tiny_stats_fops = {
.owner = THIS_MODULE,
.open = show_tiny_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *rcudir;
static int __init rcutiny_trace_init(void)
{
struct dentry *retval;
rcudir = debugfs_create_dir("rcu", NULL);
if (!rcudir)
goto free_out;
retval = debugfs_create_file("rcudata", 0444, rcudir,
NULL, &show_tiny_stats_fops);
if (!retval)
goto free_out;
return 0;
free_out:
debugfs_remove_recursive(rcudir);
return 1;
}
static void __exit rcutiny_trace_cleanup(void)
{
debugfs_remove_recursive(rcudir);
}
module_init(rcutiny_trace_init);
module_exit(rcutiny_trace_cleanup);
MODULE_AUTHOR("Paul E. McKenney");
MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
MODULE_LICENSE("GPL");
static void check_cpu_stall(struct rcu_ctrlblk *rcp)
{
unsigned long j;
unsigned long js;
if (rcu_cpu_stall_suppress)
return;
rcp->ticks_this_gp++;
j = jiffies;
js = ACCESS_ONCE(rcp->jiffies_stall);
if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
jiffies - rcp->gp_start, rcp->qlen);
dump_stack();
}
if (*rcp->curtail && ULONG_CMP_GE(j, js))
ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
3 * rcu_jiffies_till_stall_check() + 3;
else if (ULONG_CMP_GE(j, js))
ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
}
static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
{
rcp->ticks_this_gp = 0;
rcp->gp_start = jiffies;
ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
}
static void check_cpu_stalls(void)
{
RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
}
#endif /* #ifdef CONFIG_RCU_TRACE */

3773
kernel/rcu/tree.c Normal file

File diff suppressed because it is too large Load diff

645
kernel/rcu/tree.h Normal file
View file

@ -0,0 +1,645 @@
/*
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
* Internal non-public definitions.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2008
*
* Author: Ingo Molnar <mingo@elte.hu>
* Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/seqlock.h>
#include <linux/irq_work.h>
/*
* Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
* CONFIG_RCU_FANOUT_LEAF.
* In theory, it should be possible to add more levels straightforwardly.
* In practice, this did work well going from three levels to four.
* Of course, your mileage may vary.
*/
#define MAX_RCU_LVLS 4
#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF)
#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
#if NR_CPUS <= RCU_FANOUT_1
# define RCU_NUM_LVLS 1
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (NR_CPUS)
# define NUM_RCU_LVL_2 0
# define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_2
# define RCU_NUM_LVLS 2
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_2 (NR_CPUS)
# define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_3
# define RCU_NUM_LVLS 3
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_3 (NR_CPUS)
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_4
# define RCU_NUM_LVLS 4
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_4 (NR_CPUS)
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
extern int rcu_num_lvls;
extern int rcu_num_nodes;
/*
* Dynticks per-CPU state.
*/
struct rcu_dynticks {
long long dynticks_nesting; /* Track irq/process nesting level. */
/* Process level is worth LLONG_MAX/2. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
long long dynticks_idle_nesting;
/* irq/process nesting level from idle. */
atomic_t dynticks_idle; /* Even value for idle, else odd. */
/* "Idle" excludes userspace execution. */
unsigned long dynticks_idle_jiffies;
/* End of last non-NMI non-idle period. */
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
#ifdef CONFIG_RCU_FAST_NO_HZ
bool all_lazy; /* Are all CPU's CBs lazy? */
unsigned long nonlazy_posted;
/* # times non-lazy CBs posted to CPU. */
unsigned long nonlazy_posted_snap;
/* idle-period nonlazy_posted snapshot. */
unsigned long last_accelerate;
/* Last jiffy CBs were accelerated. */
unsigned long last_advance_all;
/* Last jiffy CBs were all advanced. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
};
/* RCU's kthread states for tracing. */
#define RCU_KTHREAD_STOPPED 0
#define RCU_KTHREAD_RUNNING 1
#define RCU_KTHREAD_WAITING 2
#define RCU_KTHREAD_OFFCPU 3
#define RCU_KTHREAD_YIELDING 4
#define RCU_KTHREAD_MAX 4
/*
* Definition for node within the RCU grace-period-detection hierarchy.
*/
struct rcu_node {
raw_spinlock_t lock; /* Root rcu_node's lock protects some */
/* rcu_state fields as well as following. */
unsigned long gpnum; /* Current grace period for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
unsigned long completed; /* Last GP completed for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
unsigned long qsmask; /* CPUs or groups that need to switch in */
/* order for current grace period to proceed.*/
/* In leaf rcu_node, each bit corresponds to */
/* an rcu_data structure, otherwise, each */
/* bit corresponds to a child rcu_node */
/* structure. */
unsigned long expmask; /* Groups that have ->blkd_tasks */
/* elements that need to drain to allow the */
/* current expedited grace period to */
/* complete (only for TREE_PREEMPT_RCU). */
unsigned long qsmaskinit;
/* Per-GP initial value for qsmask & expmask. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
/* Only one bit will be set in this mask. */
int grplo; /* lowest-numbered CPU or group here. */
int grphi; /* highest-numbered CPU or group here. */
u8 grpnum; /* CPU/group number for next level up. */
u8 level; /* root is at level 0. */
struct rcu_node *parent;
struct list_head blkd_tasks;
/* Tasks blocked in RCU read-side critical */
/* section. Tasks are placed at the head */
/* of this list and age towards the tail. */
struct list_head *gp_tasks;
/* Pointer to the first task blocking the */
/* current grace period, or NULL if there */
/* is no such task. */
struct list_head *exp_tasks;
/* Pointer to the first task blocking the */
/* current expedited grace period, or NULL */
/* if there is no such task. If there */
/* is no current expedited grace period, */
/* then there can cannot be any such task. */
#ifdef CONFIG_RCU_BOOST
struct list_head *boost_tasks;
/* Pointer to first task that needs to be */
/* priority boosted, or NULL if no priority */
/* boosting is needed for this rcu_node */
/* structure. If there are no tasks */
/* queued on this rcu_node structure that */
/* are blocking the current grace period, */
/* there can be no such task. */
struct completion boost_completion;
/* Used to ensure that the rt_mutex used */
/* to carry out the boosting is fully */
/* released with no future boostee accesses */
/* before that rt_mutex is re-initialized. */
struct rt_mutex boost_mtx;
/* Used only for the priority-boosting */
/* side effect, not as a lock. */
unsigned long boost_time;
/* When to start boosting (jiffies). */
struct task_struct *boost_kthread_task;
/* kthread that takes care of priority */
/* boosting for this rcu_node structure. */
unsigned int boost_kthread_status;
/* State of boost_kthread_task for tracing. */
unsigned long n_tasks_boosted;
/* Total number of tasks boosted. */
unsigned long n_exp_boosts;
/* Number of tasks boosted for expedited GP. */
unsigned long n_normal_boosts;
/* Number of tasks boosted for normal GP. */
unsigned long n_balk_blkd_tasks;
/* Refused to boost: no blocked tasks. */
unsigned long n_balk_exp_gp_tasks;
/* Refused to boost: nothing blocking GP. */
unsigned long n_balk_boost_tasks;
/* Refused to boost: already boosting. */
unsigned long n_balk_notblocked;
/* Refused to boost: RCU RS CS still running. */
unsigned long n_balk_notyet;
/* Refused to boost: not yet time. */
unsigned long n_balk_nos;
/* Refused to boost: not sure why, though. */
/* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
wait_queue_head_t nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2];
/* Counts of upcoming no-CB GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
} ____cacheline_internodealigned_in_smp;
/*
* Do a full breadth-first scan of the rcu_node structures for the
* specified rcu_state structure.
*/
#define rcu_for_each_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/*
* Do a breadth-first scan of the non-leaf rcu_node structures for the
* specified rcu_state structure. Note that if there is a singleton
* rcu_node tree with but one rcu_node structure, this loop is a no-op.
*/
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \
(rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
/*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
* structure. Note that if there is a singleton rcu_node tree with but
* one rcu_node structure, this loop -will- visit the rcu_node structure.
* It is still a leaf node, even if it is also the root node.
*/
#define rcu_for_each_leaf_node(rsp, rnp) \
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/* Index values for nxttail array in struct rcu_data. */
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
#define RCU_NEXT_TAIL 3
#define RCU_NEXT_SIZE 4
/* Per-CPU data for read-copy update. */
struct rcu_data {
/* 1) quiescent-state and grace-period handling : */
unsigned long completed; /* Track rsp->completed gp number */
/* in order to detect GP end. */
unsigned long gpnum; /* Highest gp number that this CPU */
/* is aware of having started. */
bool passed_quiesce; /* User-mode/idle loop etc. */
bool qs_pending; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
#ifdef CONFIG_RCU_CPU_STALL_INFO
unsigned long ticks_this_gp; /* The number of scheduling-clock */
/* ticks this CPU has handled */
/* during and after the last grace */
/* period it is aware of. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
/* 2) batch handling */
/*
* If nxtlist is not NULL, it is partitioned as follows.
* Any of the partitions might be empty, in which case the
* pointer to that partition will be equal to the pointer for
* the following partition. When the list is empty, all of
* the nxttail elements point to the ->nxtlist pointer itself,
* which in that case is NULL.
*
* [nxtlist, *nxttail[RCU_DONE_TAIL]):
* Entries that batch # <= ->completed
* The grace period for these entries has completed, and
* the other grace-period-completed entries may be moved
* here temporarily in rcu_process_callbacks().
* [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
* Entries that batch # <= ->completed - 1: waiting for current GP
* [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
* Entries known to have arrived before current GP ended
* [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
* Entries that might have arrived after current GP ended
* Note that the value of *nxttail[RCU_NEXT_TAIL] will
* always be NULL, as this is the end of the list.
*/
struct rcu_head *nxtlist;
struct rcu_head **nxttail[RCU_NEXT_SIZE];
unsigned long nxtcompleted[RCU_NEXT_SIZE];
/* grace periods for sublists. */
long qlen_lazy; /* # of lazy queued callbacks */
long qlen; /* # of queued callbacks, incl lazy */
long qlen_last_fqs_check;
/* qlen at last check for QS forcing */
unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
unsigned long n_force_qs_snap;
/* did other CPU force QS recently? */
long blimit; /* Upper limit on a processed batch */
/* 3) dynticks interface. */
struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
int dynticks_snap; /* Per-GP tracking for dynticks. */
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
unsigned long offline_fqs; /* Kicked due to being offline. */
unsigned long cond_resched_completed;
/* Grace period that needs help */
/* from cond_resched(). */
/* 5) __rcu_pending() statistics. */
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
unsigned long n_rp_qs_pending;
unsigned long n_rp_report_qs;
unsigned long n_rp_cb_ready;
unsigned long n_rp_cpu_needs_gp;
unsigned long n_rp_gp_completed;
unsigned long n_rp_gp_started;
unsigned long n_rp_nocb_defer_wakeup;
unsigned long n_rp_need_nothing;
/* 6) _rcu_barrier() and OOM callbacks. */
struct rcu_head barrier_head;
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
struct rcu_head *nocb_head; /* CBs waiting for kthread. */
struct rcu_head **nocb_tail;
atomic_long_t nocb_q_count; /* # CBs waiting for kthread */
atomic_long_t nocb_q_count_lazy; /* (approximate). */
struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
struct rcu_head **nocb_follower_tail;
atomic_long_t nocb_follower_count; /* # CBs ready to invoke. */
atomic_long_t nocb_follower_count_lazy; /* (approximate). */
int nocb_p_count; /* # CBs being invoked by kthread */
int nocb_p_count_lazy; /* (approximate). */
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
/* The following fields are used by the leader, hence own cacheline. */
struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
/* CBs waiting for GP. */
struct rcu_head **nocb_gp_tail;
long nocb_gp_count;
long nocb_gp_count_lazy;
bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */
struct rcu_data *nocb_next_follower;
/* Next follower in wakeup chain. */
/* The following fields are used by the follower, hence new cachline. */
struct rcu_data *nocb_leader ____cacheline_internodealigned_in_smp;
/* Leader CPU takes GP-end wakeups. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* 8) RCU CPU stall data. */
#ifdef CONFIG_RCU_CPU_STALL_INFO
unsigned int softirq_snap; /* Snapshot of softirq activity. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
int cpu;
struct rcu_state *rsp;
};
/* Values for fqs_state field in struct rcu_state. */
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
/* Values for nocb_defer_wakeup field in struct rcu_data. */
#define RCU_NOGP_WAKE_NOT 0
#define RCU_NOGP_WAKE 1
#define RCU_NOGP_WAKE_FORCE 2
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */
/* and jiffies_till_next_fqs. */
#define RCU_JIFFIES_FQS_DIV 256 /* Very large systems need more */
/* delay between bouts of */
/* quiescent-state forcing. */
#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time to take */
/* at least one scheduling clock */
/* irq before ratting on them. */
#define rcu_wait(cond) \
do { \
for (;;) { \
set_current_state(TASK_INTERRUPTIBLE); \
if (cond) \
break; \
schedule(); \
} \
__set_current_state(TASK_RUNNING); \
} while (0)
/*
* RCU global state, including node hierarchy. This hierarchy is
* represented in "heap" form in a dense array. The root (first level)
* of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
* level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
* and the third level in ->node[m+1] and following (->node[m+1] referenced
* by ->level[2]). The number of levels is determined by the number of
* CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy"
* consisting of a single rcu_node.
*/
struct rcu_state {
struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
u8 flavor_mask; /* bit in flavor mask. */
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
void (*func)(struct rcu_head *head));
/* The following fields are guarded by the root rcu_node's lock. */
u8 fqs_state ____cacheline_internodealigned_in_smp;
/* Force QS state. */
u8 boost; /* Subject to priority boost. */
unsigned long gpnum; /* Current gp number. */
unsigned long completed; /* # of last completed gp. */
struct task_struct *gp_kthread; /* Task for grace periods. */
wait_queue_head_t gp_wq; /* Where GP task waits. */
short gp_flags; /* Commands for GP task. */
short gp_state; /* GP kthread sleep state. */
/* End of fields guarded by root rcu_node's lock. */
raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
/* Protect following fields. */
struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */
/* need a grace period. */
struct rcu_head **orphan_nxttail; /* Tail of above. */
struct rcu_head *orphan_donelist; /* Orphaned callbacks that */
/* are ready to invoke. */
struct rcu_head **orphan_donetail; /* Tail of above. */
long qlen_lazy; /* Number of lazy callbacks. */
long qlen; /* Total number of callbacks. */
/* End of fields guarded by orphan_lock. */
struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
struct mutex barrier_mutex; /* Guards barrier fields. */
atomic_t barrier_cpu_count; /* # CPUs waiting on. */
struct completion barrier_completion; /* Wake at barrier end. */
unsigned long n_barrier_done; /* ++ at start and end of */
/* _rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */
atomic_long_t expedited_start; /* Starting ticket. */
atomic_long_t expedited_done; /* Done ticket. */
atomic_long_t expedited_wrap; /* # near-wrap incidents. */
atomic_long_t expedited_tryfail; /* # acquisition failures. */
atomic_long_t expedited_workdone1; /* # done by others #1. */
atomic_long_t expedited_workdone2; /* # done by others #2. */
atomic_long_t expedited_normal; /* # fallbacks to normal. */
atomic_long_t expedited_stoppedcpus; /* # successful stop_cpus. */
atomic_long_t expedited_done_tries; /* # tries to update _done. */
atomic_long_t expedited_done_lost; /* # times beaten to _done. */
atomic_long_t expedited_done_exit; /* # times exited _done loop. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
/* force_quiescent_state(). */
unsigned long n_force_qs_lh; /* ~Number of calls leaving */
/* due to lock unavailable. */
unsigned long n_force_qs_ngp; /* Number of calls leaving */
/* due to no GP active. */
unsigned long gp_start; /* Time at which GP started, */
/* but in jiffies. */
unsigned long jiffies_stall; /* Time at which to check */
/* for CPU stalls. */
unsigned long jiffies_resched; /* Time at which to resched */
/* a reluctant CPU. */
unsigned long gp_max; /* Maximum GP duration in */
/* jiffies. */
const char *name; /* Name of structure. */
char abbr; /* Abbreviated name. */
struct list_head flavors; /* List of RCU flavors. */
};
/* Values for rcu_state structure's gp_flags field. */
#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
/* Values for rcu_state structure's gp_flags field. */
#define RCU_GP_WAIT_INIT 0 /* Initial state. */
#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */
#define RCU_GP_WAIT_FQS 2 /* Wait for force-quiescent-state time. */
extern struct list_head rcu_struct_flavors;
/* Sequence through rcu_state structures for each RCU flavor. */
#define for_each_rcu_flavor(rsp) \
list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
/* Return values for rcu_preempt_offline_tasks(). */
#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
/* GP were moved to root. */
#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
/* GP were moved to root. */
/*
* RCU implementation internal declarations:
*/
extern struct rcu_state rcu_sched_state;
DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
extern struct rcu_state rcu_bh_state;
DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
#ifdef CONFIG_TREE_PREEMPT_RCU
extern struct rcu_state rcu_preempt_state;
DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DECLARE_PER_CPU(char, rcu_cpu_has_work);
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifndef RCU_TREE_NONCORE
/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
long rcu_batches_completed(void);
static void rcu_preempt_note_context_switch(int cpu);
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
unsigned long flags);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static int rcu_print_task_stall(struct rcu_node *rnp);
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
struct rcu_node *rnp,
struct rcu_data *rdp);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
static void rcu_preempt_check_callbacks(int cpu);
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake);
#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
static void __init __rcu_init_preempt(void);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void);
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_prepare_kthreads(int cpu);
static void rcu_cleanup_after_idle(int cpu);
static void rcu_prepare_for_idle(int cpu);
static void rcu_idle_count_callbacks_posted(void);
static void print_cpu_stall_info_begin(void);
static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
bool lazy, unsigned long flags);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
struct rcu_data *rdp,
unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_all_nocb_kthreads(int cpu);
static void __init rcu_spawn_nocb_kthreads(void);
#ifdef CONFIG_RCU_NOCB_CPU
static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
static bool init_nocb_callback_list(struct rcu_data *rdp);
static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
unsigned long *maxj);
static bool is_sysidle_rcu_state(struct rcu_state *rsp);
static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
unsigned long maxj);
static void rcu_bind_gp_kthread(void);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
static void rcu_dynticks_task_enter(void);
static void rcu_dynticks_task_exit(void);
#endif /* #ifndef RCU_TREE_NONCORE */
#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_NOCB_CPU
/* Sum up queue lengths for tracing. */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
*ql = atomic_long_read(&rdp->nocb_q_count) +
rdp->nocb_p_count +
atomic_long_read(&rdp->nocb_follower_count) +
rdp->nocb_p_count + rdp->nocb_gp_count;
*qll = atomic_long_read(&rdp->nocb_q_count_lazy) +
rdp->nocb_p_count_lazy +
atomic_long_read(&rdp->nocb_follower_count_lazy) +
rdp->nocb_p_count_lazy + rdp->nocb_gp_count_lazy;
}
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
*ql = 0;
*qll = 0;
}
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
#endif /* #ifdef CONFIG_RCU_TRACE */

3214
kernel/rcu/tree_plugin.h Normal file

File diff suppressed because it is too large Load diff

501
kernel/rcu/tree_trace.c Normal file
View file

@ -0,0 +1,501 @@
/*
* Read-Copy Update tracing for classic implementation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2008
*
* Papers: http://www.rdrop.com/users/paulmck/RCU
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#define RCU_TREE_NONCORE
#include "tree.h"
static int r_open(struct inode *inode, struct file *file,
const struct seq_operations *op)
{
int ret = seq_open(file, op);
if (!ret) {
struct seq_file *m = (struct seq_file *)file->private_data;
m->private = inode->i_private;
}
return ret;
}
static void *r_start(struct seq_file *m, loff_t *pos)
{
struct rcu_state *rsp = (struct rcu_state *)m->private;
*pos = cpumask_next(*pos - 1, cpu_possible_mask);
if ((*pos) < nr_cpu_ids)
return per_cpu_ptr(rsp->rda, *pos);
return NULL;
}
static void *r_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return r_start(m, pos);
}
static void r_stop(struct seq_file *m, void *v)
{
}
static int show_rcubarrier(struct seq_file *m, void *v)
{
struct rcu_state *rsp = (struct rcu_state *)m->private;
seq_printf(m, "bcc: %d nbd: %lu\n",
atomic_read(&rsp->barrier_cpu_count),
rsp->n_barrier_done);
return 0;
}
static int rcubarrier_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcubarrier, inode->i_private);
}
static const struct file_operations rcubarrier_fops = {
.owner = THIS_MODULE,
.open = rcubarrier_open,
.read = seq_read,
.llseek = no_llseek,
.release = single_release,
};
#ifdef CONFIG_RCU_BOOST
static char convert_kthread_status(unsigned int kthread_status)
{
if (kthread_status > RCU_KTHREAD_MAX)
return '?';
return "SRWOY"[kthread_status];
}
#endif /* #ifdef CONFIG_RCU_BOOST */
static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{
long ql, qll;
if (!rdp->beenonline)
return;
seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
ulong2long(rdp->completed), ulong2long(rdp->gpnum),
rdp->passed_quiesce, rdp->qs_pending);
seq_printf(m, " dt=%d/%llx/%d df=%lu",
atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
seq_printf(m, " of=%lu", rdp->offline_fqs);
rcu_nocb_q_lengths(rdp, &ql, &qll);
qll += rdp->qlen_lazy;
ql += rdp->qlen;
seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
qll, ql,
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
rdp->nxttail[RCU_NEXT_TAIL]],
".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
rdp->nxttail[RCU_NEXT_READY_TAIL]],
".W"[rdp->nxttail[RCU_DONE_TAIL] !=
rdp->nxttail[RCU_WAIT_TAIL]],
".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
#ifdef CONFIG_RCU_BOOST
seq_printf(m, " kt=%d/%c ktl=%x",
per_cpu(rcu_cpu_has_work, rdp->cpu),
convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
rdp->cpu)),
per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
#endif /* #ifdef CONFIG_RCU_BOOST */
seq_printf(m, " b=%ld", rdp->blimit);
seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n",
rdp->n_cbs_invoked, rdp->n_nocbs_invoked,
rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
}
static int show_rcudata(struct seq_file *m, void *v)
{
print_one_rcu_data(m, (struct rcu_data *)v);
return 0;
}
static const struct seq_operations rcudate_op = {
.start = r_start,
.next = r_next,
.stop = r_stop,
.show = show_rcudata,
};
static int rcudata_open(struct inode *inode, struct file *file)
{
return r_open(inode, file, &rcudate_op);
}
static const struct file_operations rcudata_fops = {
.owner = THIS_MODULE,
.open = rcudata_open,
.read = seq_read,
.llseek = no_llseek,
.release = seq_release,
};
static int show_rcuexp(struct seq_file *m, void *v)
{
struct rcu_state *rsp = (struct rcu_state *)m->private;
seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n",
atomic_long_read(&rsp->expedited_start),
atomic_long_read(&rsp->expedited_done),
atomic_long_read(&rsp->expedited_wrap),
atomic_long_read(&rsp->expedited_tryfail),
atomic_long_read(&rsp->expedited_workdone1),
atomic_long_read(&rsp->expedited_workdone2),
atomic_long_read(&rsp->expedited_normal),
atomic_long_read(&rsp->expedited_stoppedcpus),
atomic_long_read(&rsp->expedited_done_tries),
atomic_long_read(&rsp->expedited_done_lost),
atomic_long_read(&rsp->expedited_done_exit));
return 0;
}
static int rcuexp_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcuexp, inode->i_private);
}
static const struct file_operations rcuexp_fops = {
.owner = THIS_MODULE,
.open = rcuexp_open,
.read = seq_read,
.llseek = no_llseek,
.release = single_release,
};
#ifdef CONFIG_RCU_BOOST
static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
{
seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
rnp->grplo, rnp->grphi,
"T."[list_empty(&rnp->blkd_tasks)],
"N."[!rnp->gp_tasks],
"E."[!rnp->exp_tasks],
"B."[!rnp->boost_tasks],
convert_kthread_status(rnp->boost_kthread_status),
rnp->n_tasks_boosted, rnp->n_exp_boosts,
rnp->n_normal_boosts);
seq_printf(m, "j=%04x bt=%04x\n",
(int)(jiffies & 0xffff),
(int)(rnp->boost_time & 0xffff));
seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
rnp->n_balk_blkd_tasks,
rnp->n_balk_exp_gp_tasks,
rnp->n_balk_boost_tasks,
rnp->n_balk_notblocked,
rnp->n_balk_notyet,
rnp->n_balk_nos);
}
static int show_rcu_node_boost(struct seq_file *m, void *unused)
{
struct rcu_node *rnp;
rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
print_one_rcu_node_boost(m, rnp);
return 0;
}
static int rcu_node_boost_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcu_node_boost, NULL);
}
static const struct file_operations rcu_node_boost_fops = {
.owner = THIS_MODULE,
.open = rcu_node_boost_open,
.read = seq_read,
.llseek = no_llseek,
.release = single_release,
};
#endif /* #ifdef CONFIG_RCU_BOOST */
static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
{
unsigned long gpnum;
int level = 0;
struct rcu_node *rnp;
gpnum = rsp->gpnum;
seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
ulong2long(rsp->completed), ulong2long(gpnum),
rsp->fqs_state,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff));
seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
rsp->n_force_qs, rsp->n_force_qs_ngp,
rsp->n_force_qs - rsp->n_force_qs_ngp,
ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
if (rnp->level != level) {
seq_puts(m, "\n");
level = rnp->level;
}
seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ",
rnp->qsmask, rnp->qsmaskinit,
".G"[rnp->gp_tasks != NULL],
".E"[rnp->exp_tasks != NULL],
".T"[!list_empty(&rnp->blkd_tasks)],
rnp->grplo, rnp->grphi, rnp->grpnum);
}
seq_puts(m, "\n");
}
static int show_rcuhier(struct seq_file *m, void *v)
{
struct rcu_state *rsp = (struct rcu_state *)m->private;
print_one_rcu_state(m, rsp);
return 0;
}
static int rcuhier_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcuhier, inode->i_private);
}
static const struct file_operations rcuhier_fops = {
.owner = THIS_MODULE,
.open = rcuhier_open,
.read = seq_read,
.llseek = no_llseek,
.release = single_release,
};
static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
{
unsigned long flags;
unsigned long completed;
unsigned long gpnum;
unsigned long gpage;
unsigned long gpmax;
struct rcu_node *rnp = &rsp->node[0];
raw_spin_lock_irqsave(&rnp->lock, flags);
completed = ACCESS_ONCE(rsp->completed);
gpnum = ACCESS_ONCE(rsp->gpnum);
if (completed == gpnum)
gpage = 0;
else
gpage = jiffies - rsp->gp_start;
gpmax = rsp->gp_max;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
seq_printf(m, "completed=%ld gpnum=%ld age=%ld max=%ld\n",
ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
}
static int show_rcugp(struct seq_file *m, void *v)
{
struct rcu_state *rsp = (struct rcu_state *)m->private;
show_one_rcugp(m, rsp);
return 0;
}
static int rcugp_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcugp, inode->i_private);
}
static const struct file_operations rcugp_fops = {
.owner = THIS_MODULE,
.open = rcugp_open,
.read = seq_read,
.llseek = no_llseek,
.release = single_release,
};
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
seq_printf(m, "%3d%cnp=%ld ",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->n_rcu_pending);
seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
rdp->n_rp_qs_pending,
rdp->n_rp_report_qs,
rdp->n_rp_cb_ready,
rdp->n_rp_cpu_needs_gp);
seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
rdp->n_rp_gp_completed,
rdp->n_rp_gp_started,
rdp->n_rp_nocb_defer_wakeup,
rdp->n_rp_need_nothing);
}
static int show_rcu_pending(struct seq_file *m, void *v)
{
print_one_rcu_pending(m, (struct rcu_data *)v);
return 0;
}
static const struct seq_operations rcu_pending_op = {
.start = r_start,
.next = r_next,
.stop = r_stop,
.show = show_rcu_pending,
};
static int rcu_pending_open(struct inode *inode, struct file *file)
{
return r_open(inode, file, &rcu_pending_op);
}
static const struct file_operations rcu_pending_fops = {
.owner = THIS_MODULE,
.open = rcu_pending_open,
.read = seq_read,
.llseek = no_llseek,
.release = seq_release,
};
static int show_rcutorture(struct seq_file *m, void *unused)
{
seq_printf(m, "rcutorture test sequence: %lu %s\n",
rcutorture_testseq >> 1,
(rcutorture_testseq & 0x1) ? "(test in progress)" : "");
seq_printf(m, "rcutorture update version number: %lu\n",
rcutorture_vernum);
return 0;
}
static int rcutorture_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcutorture, NULL);
}
static const struct file_operations rcutorture_fops = {
.owner = THIS_MODULE,
.open = rcutorture_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *rcudir;
static int __init rcutree_trace_init(void)
{
struct rcu_state *rsp;
struct dentry *retval;
struct dentry *rspdir;
rcudir = debugfs_create_dir("rcu", NULL);
if (!rcudir)
goto free_out;
for_each_rcu_flavor(rsp) {
rspdir = debugfs_create_dir(rsp->name, rcudir);
if (!rspdir)
goto free_out;
retval = debugfs_create_file("rcudata", 0444,
rspdir, rsp, &rcudata_fops);
if (!retval)
goto free_out;
retval = debugfs_create_file("rcuexp", 0444,
rspdir, rsp, &rcuexp_fops);
if (!retval)
goto free_out;
retval = debugfs_create_file("rcu_pending", 0444,
rspdir, rsp, &rcu_pending_fops);
if (!retval)
goto free_out;
retval = debugfs_create_file("rcubarrier", 0444,
rspdir, rsp, &rcubarrier_fops);
if (!retval)
goto free_out;
#ifdef CONFIG_RCU_BOOST
if (rsp == &rcu_preempt_state) {
retval = debugfs_create_file("rcuboost", 0444,
rspdir, NULL, &rcu_node_boost_fops);
if (!retval)
goto free_out;
}
#endif
retval = debugfs_create_file("rcugp", 0444,
rspdir, rsp, &rcugp_fops);
if (!retval)
goto free_out;
retval = debugfs_create_file("rcuhier", 0444,
rspdir, rsp, &rcuhier_fops);
if (!retval)
goto free_out;
}
retval = debugfs_create_file("rcutorture", 0444, rcudir,
NULL, &rcutorture_fops);
if (!retval)
goto free_out;
return 0;
free_out:
debugfs_remove_recursive(rcudir);
return 1;
}
static void __exit rcutree_trace_cleanup(void)
{
debugfs_remove_recursive(rcudir);
}
module_init(rcutree_trace_init);
module_exit(rcutree_trace_cleanup);
MODULE_AUTHOR("Paul E. McKenney");
MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
MODULE_LICENSE("GPL");

692
kernel/rcu/update.c Normal file
View file

@ -0,0 +1,692 @@
/*
* Read-Copy Update mechanism for mutual exclusion
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2001
*
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
* Manfred Spraul <manfred@colorfullife.com>
*
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
* Papers:
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
*
* For detailed explanation of Read-Copy Update mechanism see -
* http://lse.sourceforge.net/locking/rcupdate.html
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/export.h>
#include <linux/hardirq.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/tick.h>
#define CREATE_TRACE_POINTS
#include "rcu.h"
MODULE_ALIAS("rcupdate");
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "rcupdate."
module_param(rcu_expedited, int, 0);
#ifdef CONFIG_PREEMPT_RCU
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
* if we block.
*/
void __rcu_read_lock(void)
{
current->rcu_read_lock_nesting++;
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
/*
* Preemptible RCU implementation for rcu_read_unlock().
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
* invoke rcu_read_unlock_special() to clean up after a context switch
* in an RCU read-side critical section and other special cases.
*/
void __rcu_read_unlock(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting != 1) {
--t->rcu_read_lock_nesting;
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
}
#ifdef CONFIG_PROVE_LOCKING
{
int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
EXPORT_SYMBOL_GPL(rcu_lock_map);
static struct lock_class_key rcu_bh_lock_key;
struct lockdep_map rcu_bh_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
static struct lock_class_key rcu_sched_lock_key;
struct lockdep_map rcu_sched_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
static struct lock_class_key rcu_callback_key;
struct lockdep_map rcu_callback_map =
STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
EXPORT_SYMBOL_GPL(rcu_callback_map);
int notrace debug_lockdep_rcu_enabled(void)
{
return rcu_scheduler_active && debug_locks &&
current->lockdep_recursion == 0;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
/**
* rcu_read_lock_held() - might we be in RCU read-side critical section?
*
* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
* this assumes we are in an RCU read-side critical section unless it can
* prove otherwise. This is useful for debug checks in functions that
* require that they be called within an RCU read-side critical section.
*
* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
*
* Note that rcu_read_lock() and the matching rcu_read_unlock() must
* occur in the same context, for example, it is illegal to invoke
* rcu_read_unlock() in process context if the matching rcu_read_lock()
* was invoked from within an irq handler.
*
* Note that rcu_read_lock() is disallowed if the CPU is either idle or
* offline from an RCU perspective, so check for those as well.
*/
int rcu_read_lock_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
if (!rcu_is_watching())
return 0;
if (!rcu_lockdep_current_cpu_online())
return 0;
return lock_is_held(&rcu_lock_map);
}
EXPORT_SYMBOL_GPL(rcu_read_lock_held);
/**
* rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
*
* Check for bottom half being disabled, which covers both the
* CONFIG_PROVE_RCU and not cases. Note that if someone uses
* rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
* will show the situation. This is useful for debug checks in functions
* that require that they be called within an RCU read-side critical
* section.
*
* Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
*
* Note that rcu_read_lock() is disallowed if the CPU is either idle or
* offline from an RCU perspective, so check for those as well.
*/
int rcu_read_lock_bh_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
if (!rcu_is_watching())
return 0;
if (!rcu_lockdep_current_cpu_online())
return 0;
return in_softirq() || irqs_disabled();
}
EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
struct rcu_synchronize {
struct rcu_head head;
struct completion completion;
};
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
*/
static void wakeme_after_rcu(struct rcu_head *head)
{
struct rcu_synchronize *rcu;
rcu = container_of(head, struct rcu_synchronize, head);
complete(&rcu->completion);
}
void wait_rcu_gp(call_rcu_func_t crf)
{
struct rcu_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
crf(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(wait_rcu_gp);
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
void init_rcu_head(struct rcu_head *head)
{
debug_object_init(head, &rcuhead_debug_descr);
}
void destroy_rcu_head(struct rcu_head *head)
{
debug_object_free(head, &rcuhead_debug_descr);
}
/*
* fixup_activate is called when:
* - an active object is activated
* - an unknown object is activated (might be a statically initialized object)
* Activation is performed internally by call_rcu().
*/
static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
{
struct rcu_head *head = addr;
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
/*
* This is not really a fixup. We just make sure that it is
* tracked in the object tracker.
*/
debug_object_init(head, &rcuhead_debug_descr);
debug_object_activate(head, &rcuhead_debug_descr);
return 0;
default:
return 1;
}
}
/**
* init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
* @head: pointer to rcu_head structure to be initialized
*
* This function informs debugobjects of a new rcu_head structure that
* has been allocated as an auto variable on the stack. This function
* is not required for rcu_head structures that are statically defined or
* that are dynamically allocated on the heap. This function has no
* effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
*/
void init_rcu_head_on_stack(struct rcu_head *head)
{
debug_object_init_on_stack(head, &rcuhead_debug_descr);
}
EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
/**
* destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
* @head: pointer to rcu_head structure to be initialized
*
* This function informs debugobjects that an on-stack rcu_head structure
* is about to go out of scope. As with init_rcu_head_on_stack(), this
* function is not required for rcu_head structures that are statically
* defined or that are dynamically allocated on the heap. Also as with
* init_rcu_head_on_stack(), this function has no effect for
* !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
*/
void destroy_rcu_head_on_stack(struct rcu_head *head)
{
debug_object_free(head, &rcuhead_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
struct debug_obj_descr rcuhead_debug_descr = {
.name = "rcu_head",
.fixup_activate = rcuhead_fixup_activate,
};
EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp,
unsigned long secs,
unsigned long c_old, unsigned long c)
{
trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
}
EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
#else
#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
do { } while (0)
#endif
#ifdef CONFIG_RCU_STALL_COMMON
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
#define RCU_STALL_DELAY_DELTA 0
#endif
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_suppress, int, 0644);
module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_jiffies_till_stall_check(void)
{
int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
/*
* Limit check must be consistent with the Kconfig limits
* for CONFIG_RCU_CPU_STALL_TIMEOUT.
*/
if (till_stall_check < 3) {
ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
till_stall_check = 3;
} else if (till_stall_check > 300) {
ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
till_stall_check = 300;
}
return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
}
void rcu_sysrq_start(void)
{
if (!rcu_cpu_stall_suppress)
rcu_cpu_stall_suppress = 2;
}
void rcu_sysrq_end(void)
{
if (rcu_cpu_stall_suppress == 2)
rcu_cpu_stall_suppress = 0;
}
static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
{
rcu_cpu_stall_suppress = 1;
return NOTIFY_DONE;
}
static struct notifier_block rcu_panic_block = {
.notifier_call = rcu_panic,
};
static int __init check_cpu_stall_init(void)
{
atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
return 0;
}
early_initcall(check_cpu_stall_init);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
#ifdef CONFIG_TASKS_RCU
/*
* Simple variant of RCU whose quiescent states are voluntary context switch,
* user-space execution, and idle. As such, grace periods can take one good
* long time. There are no read-side primitives similar to rcu_read_lock()
* and rcu_read_unlock() because this implementation is intended to get
* the system into a safe state for some of the manipulations involved in
* tracing and the like. Finally, this implementation does not support
* high call_rcu_tasks() rates from multiple CPUs. If this is required,
* per-CPU callback lists will be needed.
*/
/* Global list of callbacks and associated lock. */
static struct rcu_head *rcu_tasks_cbs_head;
static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_SRCU(tasks_rcu_exit_srcu);
/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
module_param(rcu_task_stall_timeout, int, 0644);
static void rcu_spawn_tasks_kthread(void);
/*
* Post an RCU-tasks callback. First call must be from process context
* after the scheduler if fully operational.
*/
void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
{
unsigned long flags;
bool needwake;
rhp->next = NULL;
rhp->func = func;
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
needwake = !rcu_tasks_cbs_head;
*rcu_tasks_cbs_tail = rhp;
rcu_tasks_cbs_tail = &rhp->next;
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
if (needwake) {
rcu_spawn_tasks_kthread();
wake_up(&rcu_tasks_cbs_wq);
}
}
EXPORT_SYMBOL_GPL(call_rcu_tasks);
/**
* synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
*
* Control will return to the caller some time after a full rcu-tasks
* grace period has elapsed, in other words after all currently
* executing rcu-tasks read-side critical sections have elapsed. These
* read-side critical sections are delimited by calls to schedule(),
* cond_resched_rcu_qs(), idle execution, userspace execution, calls
* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
*
* This is a very specialized primitive, intended only for a few uses in
* tracing and other situations requiring manipulation of function
* preambles and profiling hooks. The synchronize_rcu_tasks() function
* is not (yet) intended for heavy use from multiple CPUs.
*
* Note that this guarantee implies further memory-ordering guarantees.
* On systems with more than one CPU, when synchronize_rcu_tasks() returns,
* each CPU is guaranteed to have executed a full memory barrier since the
* end of its last RCU-tasks read-side critical section whose beginning
* preceded the call to synchronize_rcu_tasks(). In addition, each CPU
* having an RCU-tasks read-side critical section that extends beyond
* the return from synchronize_rcu_tasks() is guaranteed to have executed
* a full memory barrier after the beginning of synchronize_rcu_tasks()
* and before the beginning of that RCU-tasks read-side critical section.
* Note that these guarantees include CPUs that are offline, idle, or
* executing in user mode, as well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
* to have executed a full memory barrier during the execution of
* synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
* (but again only if the system has more than one CPU).
*/
void synchronize_rcu_tasks(void)
{
/* Complain if the scheduler has not started. */
rcu_lockdep_assert(!rcu_scheduler_active,
"synchronize_rcu_tasks called too soon");
/* Wait for the grace period. */
wait_rcu_gp(call_rcu_tasks);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
/**
* rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
*
* Although the current implementation is guaranteed to wait, it is not
* obligated to, for example, if there are no pending callbacks.
*/
void rcu_barrier_tasks(void)
{
/* There is only one callback queue, so this is easy. ;-) */
synchronize_rcu_tasks();
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
/* See if tasks are still holding out, complain if so. */
static void check_holdout_task(struct task_struct *t,
bool needreport, bool *firstreport)
{
int cpu;
if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
!ACCESS_ONCE(t->on_rq) ||
(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
ACCESS_ONCE(t->rcu_tasks_holdout) = false;
list_del_init(&t->rcu_tasks_holdout_list);
put_task_struct(t);
return;
}
if (!needreport)
return;
if (*firstreport) {
pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
*firstreport = false;
}
cpu = task_cpu(t);
pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
t, ".I"[is_idle_task(t)],
"N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
t->rcu_tasks_idle_cpu, cpu);
sched_show_task(t);
}
/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
static int __noreturn rcu_tasks_kthread(void *arg)
{
unsigned long flags;
struct task_struct *g, *t;
unsigned long lastreport;
struct rcu_head *list;
struct rcu_head *next;
LIST_HEAD(rcu_tasks_holdouts);
/* FIXME: Add housekeeping affinity. */
/*
* Each pass through the following loop makes one check for
* newly arrived callbacks, and, if there are some, waits for
* one RCU-tasks grace period and then invokes the callbacks.
* This loop is terminated by the system going down. ;-)
*/
for (;;) {
/* Pick up any new callbacks. */
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
list = rcu_tasks_cbs_head;
rcu_tasks_cbs_head = NULL;
rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
/* If there were none, wait a bit and start over. */
if (!list) {
wait_event_interruptible(rcu_tasks_cbs_wq,
rcu_tasks_cbs_head);
if (!rcu_tasks_cbs_head) {
WARN_ON(signal_pending(current));
schedule_timeout_interruptible(HZ/10);
}
continue;
}
/*
* Wait for all pre-existing t->on_rq and t->nvcsw
* transitions to complete. Invoking synchronize_sched()
* suffices because all these transitions occur with
* interrupts disabled. Without this synchronize_sched(),
* a read-side critical section that started before the
* grace period might be incorrectly seen as having started
* after the grace period.
*
* This synchronize_sched() also dispenses with the
* need for a memory barrier on the first store to
* ->rcu_tasks_holdout, as it forces the store to happen
* after the beginning of the grace period.
*/
synchronize_sched();
/*
* There were callbacks, so we need to wait for an
* RCU-tasks grace period. Start off by scanning
* the task list for tasks that are not already
* voluntarily blocked. Mark these tasks and make
* a list of them in rcu_tasks_holdouts.
*/
rcu_read_lock();
for_each_process_thread(g, t) {
if (t != current && ACCESS_ONCE(t->on_rq) &&
!is_idle_task(t)) {
get_task_struct(t);
t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
ACCESS_ONCE(t->rcu_tasks_holdout) = true;
list_add(&t->rcu_tasks_holdout_list,
&rcu_tasks_holdouts);
}
}
rcu_read_unlock();
/*
* Wait for tasks that are in the process of exiting.
* This does only part of the job, ensuring that all
* tasks that were previously exiting reach the point
* where they have disabled preemption, allowing the
* later synchronize_sched() to finish the job.
*/
synchronize_srcu(&tasks_rcu_exit_srcu);
/*
* Each pass through the following loop scans the list
* of holdout tasks, removing any that are no longer
* holdouts. When the list is empty, we are done.
*/
lastreport = jiffies;
while (!list_empty(&rcu_tasks_holdouts)) {
bool firstreport;
bool needreport;
int rtst;
struct task_struct *t1;
schedule_timeout_interruptible(HZ);
rtst = ACCESS_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 &&
time_after(jiffies, lastreport + rtst);
if (needreport)
lastreport = jiffies;
firstreport = true;
WARN_ON(signal_pending(current));
list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
rcu_tasks_holdout_list) {
check_holdout_task(t, needreport, &firstreport);
cond_resched();
}
}
/*
* Because ->on_rq and ->nvcsw are not guaranteed
* to have a full memory barriers prior to them in the
* schedule() path, memory reordering on other CPUs could
* cause their RCU-tasks read-side critical sections to
* extend past the end of the grace period. However,
* because these ->nvcsw updates are carried out with
* interrupts disabled, we can use synchronize_sched()
* to force the needed ordering on all such CPUs.
*
* This synchronize_sched() also confines all
* ->rcu_tasks_holdout accesses to be within the grace
* period, avoiding the need for memory barriers for
* ->rcu_tasks_holdout accesses.
*
* In addition, this synchronize_sched() waits for exiting
* tasks to complete their final preempt_disable() region
* of execution, cleaning up after the synchronize_srcu()
* above.
*/
synchronize_sched();
/* Invoke the callbacks. */
while (list) {
next = list->next;
local_bh_disable();
list->func(list);
local_bh_enable();
list = next;
cond_resched();
}
schedule_timeout_uninterruptible(HZ/10);
}
}
/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
static void rcu_spawn_tasks_kthread(void)
{
static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
static struct task_struct *rcu_tasks_kthread_ptr;
struct task_struct *t;
if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
smp_mb(); /* Ensure caller sees full kthread. */
return;
}
mutex_lock(&rcu_tasks_kthread_mutex);
if (rcu_tasks_kthread_ptr) {
mutex_unlock(&rcu_tasks_kthread_mutex);
return;
}
t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
BUG_ON(IS_ERR(t));
smp_mb(); /* Ensure others see full kthread. */
ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
mutex_unlock(&rcu_tasks_kthread_mutex);
}
#endif /* #ifdef CONFIG_TASKS_RCU */