Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

205
kernel/time/Kconfig Normal file
View file

@ -0,0 +1,205 @@
#
# Timer subsystem related configuration options
#
# Options selectable by arch Kconfig
# Watchdog function for clocksources to detect instabilities
config CLOCKSOURCE_WATCHDOG
bool
# Architecture has extra clocksource data
config ARCH_CLOCKSOURCE_DATA
bool
# Clocksources require validation of the clocksource against the last
# cycle update - x86/TSC misfeature
config CLOCKSOURCE_VALIDATE_LAST_CYCLE
bool
# Timekeeping vsyscall support
config GENERIC_TIME_VSYSCALL
bool
# Timekeeping vsyscall support
config GENERIC_TIME_VSYSCALL_OLD
bool
# Old style timekeeping
config ARCH_USES_GETTIMEOFFSET
bool
# The generic clock events infrastructure
config GENERIC_CLOCKEVENTS
bool
# Migration helper. Builds, but does not invoke
config GENERIC_CLOCKEVENTS_BUILD
bool
default y
depends on GENERIC_CLOCKEVENTS
# Architecture can handle broadcast in a driver-agnostic way
config ARCH_HAS_TICK_BROADCAST
bool
# Clockevents broadcasting infrastructure
config GENERIC_CLOCKEVENTS_BROADCAST
bool
depends on GENERIC_CLOCKEVENTS
# Automatically adjust the min. reprogramming time for
# clock event device
config GENERIC_CLOCKEVENTS_MIN_ADJUST
bool
# Generic update of CMOS clock
config GENERIC_CMOS_UPDATE
bool
if GENERIC_CLOCKEVENTS
menu "Timers subsystem"
# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
# only related to the tick functionality. Oneshot clockevent devices
# are supported independ of this.
config TICK_ONESHOT
bool
config NO_HZ_COMMON
bool
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
choice
prompt "Timer tick handling"
default NO_HZ_IDLE if NO_HZ
config HZ_PERIODIC
bool "Periodic timer ticks (constant rate, no dynticks)"
help
This option keeps the tick running periodically at a constant
rate, even when the CPU doesn't need it.
config NO_HZ_IDLE
bool "Idle dynticks system (tickless idle)"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
select NO_HZ_COMMON
help
This option enables a tickless idle system: timer interrupts
will only trigger on an as-needed basis when the system is idle.
This is usually interesting for energy saving.
Most of the time you want to say Y here.
config NO_HZ_FULL
bool "Full dynticks system (tickless)"
# NO_HZ_COMMON dependency
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
# We need at least one periodic CPU for timekeeping
depends on SMP
# RCU_USER_QS dependency
depends on HAVE_CONTEXT_TRACKING
# VIRT_CPU_ACCOUNTING_GEN dependency
depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
select NO_HZ_COMMON
select RCU_USER_QS
select RCU_NOCB_CPU
select VIRT_CPU_ACCOUNTING_GEN
select IRQ_WORK
help
Adaptively try to shutdown the tick whenever possible, even when
the CPU is running tasks. Typically this requires running a single
task on the CPU. Chances for running tickless are maximized when
the task mostly runs in userspace and has few kernel activity.
You need to fill up the nohz_full boot parameter with the
desired range of dynticks CPUs.
This is implemented at the expense of some overhead in user <-> kernel
transitions: syscalls, exceptions and interrupts. Even when it's
dynamically off.
Say N.
endchoice
config NO_HZ_FULL_ALL
bool "Full dynticks system on all CPUs by default (except CPU 0)"
depends on NO_HZ_FULL
help
If the user doesn't pass the nohz_full boot option to
define the range of full dynticks CPUs, consider that all
CPUs in the system are full dynticks by default.
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
config NO_HZ_FULL_SYSIDLE
bool "Detect full-system idle state for full dynticks system"
depends on NO_HZ_FULL
default n
help
At least one CPU must keep the scheduling-clock tick running for
timekeeping purposes whenever there is a non-idle CPU, where
"non-idle" also includes dynticks CPUs as long as they are
running non-idle tasks. Because the underlying adaptive-tick
support cannot distinguish between all CPUs being idle and
all CPUs each running a single task in dynticks mode, the
underlying support simply ensures that there is always a CPU
handling the scheduling-clock tick, whether or not all CPUs
are idle. This Kconfig option enables scalable detection of
the all-CPUs-idle state, thus allowing the scheduling-clock
tick to be disabled when all CPUs are idle. Note that scalable
detection of the all-CPUs-idle state means that larger systems
will be slower to declare the all-CPUs-idle state.
Say Y if you would like to help debug all-CPUs-idle detection.
Say N if you are unsure.
config NO_HZ_FULL_SYSIDLE_SMALL
int "Number of CPUs above which large-system approach is used"
depends on NO_HZ_FULL_SYSIDLE
range 1 NR_CPUS
default 8
help
The full-system idle detection mechanism takes a lazy approach
on large systems, as is required to attain decent scalability.
However, on smaller systems, scalability is not anywhere near as
large a concern as is energy efficiency. The sysidle subsystem
therefore uses a fast but non-scalable algorithm for small
systems and a lazier but scalable algorithm for large systems.
This Kconfig parameter defines the number of CPUs in the largest
system that will be considered to be "small".
The default value will be fine in most cases. Battery-powered
systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
numbers of CPUs, and (3) are suffering from battery-lifetime
problems due to long sysidle latencies might wish to experiment
with larger values for this Kconfig parameter. On the other
hand, they might be even better served by disabling NO_HZ_FULL
entirely, given that NO_HZ_FULL is intended for HPC and
real-time workloads that at present do not tend to be run on
battery-powered systems.
Take the default if you are unsure.
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
help
This is the old config entry that enables dynticks idle.
We keep it around for a little while to enforce backward
compatibility with older config files.
config HIGH_RES_TIMERS
bool "High Resolution Timer Support"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
help
This option enables high resolution timer support. If your
hardware is not capable then this option only increases
the size of the kernel image.
endmenu
endif

33
kernel/time/Makefile Normal file
View file

@ -0,0 +1,33 @@
obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
obj-y += timeconv.o posix-clock.o alarmtimer.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
obj-y += tick-broadcast.o
obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += udelay_test.o
$(obj)/time.o: $(obj)/timeconst.h
quiet_cmd_hzfile = HZFILE $@
cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
targets += hz.bc
$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
$(call if_changed,hzfile)
quiet_cmd_bc = BC $@
cmd_bc = bc -q $(filter-out FORCE,$^) > $@
targets += timeconst.h
$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
$(call if_changed,bc)

873
kernel/time/alarmtimer.c Normal file
View file

@ -0,0 +1,873 @@
/*
* Alarmtimer interface
*
* This interface provides a timer which is similarto hrtimers,
* but triggers a RTC alarm if the box is suspend.
*
* This interface is influenced by the Android RTC Alarm timer
* interface.
*
* Copyright (C) 2010 IBM Corperation
*
* Author: John Stultz <john.stultz@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/timerqueue.h>
#include <linux/rtc.h>
#include <linux/alarmtimer.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/posix-timers.h>
#include <linux/workqueue.h>
#include <linux/freezer.h>
/**
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
* @timerqueue: Timerqueue head managing the list of events
* @timer: hrtimer used to schedule events while running
* @gettime: Function to read the time correlating to the base
* @base_clockid: clockid for the base
*/
static struct alarm_base {
spinlock_t lock;
struct timerqueue_head timerqueue;
ktime_t (*gettime)(void);
clockid_t base_clockid;
} alarm_bases[ALARM_NUMTYPE];
/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
static ktime_t freezer_delta;
static DEFINE_SPINLOCK(freezer_delta_lock);
static struct wakeup_source *ws;
#ifdef CONFIG_RTC_CLASS
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
* This function returns the rtc device to use for wakealarms.
* If one has not already been chosen, it checks to see if a
* functional rtc device is available.
*/
struct rtc_device *alarmtimer_get_rtcdev(void)
{
unsigned long flags;
struct rtc_device *ret;
spin_lock_irqsave(&rtcdev_lock, flags);
ret = rtcdev;
spin_unlock_irqrestore(&rtcdev_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(alarmtimer_get_rtcdev);
static int alarmtimer_rtc_add_device(struct device *dev,
struct class_interface *class_intf)
{
unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
if (rtcdev)
return -EBUSY;
if (!rtc->ops->set_alarm)
return -1;
if (!device_may_wakeup(rtc->dev.parent))
return -1;
spin_lock_irqsave(&rtcdev_lock, flags);
if (!rtcdev) {
rtcdev = rtc;
/* hold a reference so it doesn't go away */
get_device(dev);
}
spin_unlock_irqrestore(&rtcdev_lock, flags);
return 0;
}
static inline void alarmtimer_rtc_timer_init(void)
{
rtc_timer_init(&rtctimer, NULL, NULL);
}
static struct class_interface alarmtimer_rtc_interface = {
.add_dev = &alarmtimer_rtc_add_device,
};
static int alarmtimer_rtc_interface_setup(void)
{
alarmtimer_rtc_interface.class = rtc_class;
return class_interface_register(&alarmtimer_rtc_interface);
}
static void alarmtimer_rtc_interface_remove(void)
{
class_interface_unregister(&alarmtimer_rtc_interface);
}
#else
struct rtc_device *alarmtimer_get_rtcdev(void)
{
return NULL;
}
#define rtcdev (NULL)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
static inline void alarmtimer_rtc_timer_init(void) { }
#endif
/**
* alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
* @base: pointer to the base where the timer is being run
* @alarm: pointer to alarm being enqueued.
*
* Adds alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
{
if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
timerqueue_del(&base->timerqueue, &alarm->node);
timerqueue_add(&base->timerqueue, &alarm->node);
alarm->state |= ALARMTIMER_STATE_ENQUEUED;
}
/**
* alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue
* @base: pointer to the base where the timer is running
* @alarm: pointer to alarm being removed
*
* Removes alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
{
if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
return;
timerqueue_del(&base->timerqueue, &alarm->node);
alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
}
/**
* alarmtimer_fired - Handles alarm hrtimer being fired.
* @timer: pointer to hrtimer being run
*
* When a alarm timer fires, this runs through the timerqueue to
* see which alarms expired, and runs those. If there are more alarm
* timers queued for the future, we set the hrtimer to fire when
* when the next future alarm timer expires.
*/
static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
{
struct alarm *alarm = container_of(timer, struct alarm, timer);
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
int ret = HRTIMER_NORESTART;
int restart = ALARMTIMER_NORESTART;
spin_lock_irqsave(&base->lock, flags);
alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
if (alarm->function)
restart = alarm->function(alarm, base->gettime());
spin_lock_irqsave(&base->lock, flags);
if (restart != ALARMTIMER_NORESTART) {
hrtimer_set_expires(&alarm->timer, alarm->node.expires);
alarmtimer_enqueue(base, alarm);
ret = HRTIMER_RESTART;
}
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
ktime_t alarm_expires_remaining(const struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
return ktime_sub(alarm->node.expires, base->gettime());
}
EXPORT_SYMBOL_GPL(alarm_expires_remaining);
#ifdef CONFIG_RTC_CLASS
/**
* alarmtimer_suspend - Suspend time callback
* @dev: unused
* @state: unused
*
* When we are going into suspend, we look through the bases
* to see which is the soonest timer to expire. We then
* set an rtc timer to fire that far into the future, which
* will wake us from suspend.
*/
static int alarmtimer_suspend(struct device *dev)
{
struct rtc_time tm;
ktime_t min, now;
unsigned long flags;
struct rtc_device *rtc;
int i;
int ret;
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
freezer_delta = ktime_set(0, 0);
spin_unlock_irqrestore(&freezer_delta_lock, flags);
rtc = alarmtimer_get_rtcdev();
/* If we have no rtcdev, just return */
if (!rtc)
return 0;
/* Find the soonest timer to expire*/
for (i = 0; i < ALARM_NUMTYPE; i++) {
struct alarm_base *base = &alarm_bases[i];
struct timerqueue_node *next;
ktime_t delta;
spin_lock_irqsave(&base->lock, flags);
next = timerqueue_getnext(&base->timerqueue);
spin_unlock_irqrestore(&base->lock, flags);
if (!next)
continue;
delta = ktime_sub(next->expires, base->gettime());
if (!min.tv64 || (delta.tv64 < min.tv64))
min = delta;
}
if (min.tv64 == 0)
return 0;
if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
__pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
return -EBUSY;
}
/* Setup an rtc timer to fire that far in the future */
rtc_timer_cancel(rtc, &rtctimer);
rtc_read_time(rtc, &tm);
now = rtc_tm_to_ktime(tm);
now = ktime_add(now, min);
/* Set alarm, if in the past reject suspend briefly to handle */
ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
if (ret < 0)
__pm_wakeup_event(ws, MSEC_PER_SEC);
return ret;
}
#else
static int alarmtimer_suspend(struct device *dev)
{
return 0;
}
#endif
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
{
ktime_t delta;
unsigned long flags;
struct alarm_base *base = &alarm_bases[type];
delta = ktime_sub(absexp, base->gettime());
spin_lock_irqsave(&freezer_delta_lock, flags);
if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
freezer_delta = delta;
spin_unlock_irqrestore(&freezer_delta_lock, flags);
}
/**
* alarm_init - Initialize an alarm structure
* @alarm: ptr to alarm to be initialized
* @type: the type of the alarm
* @function: callback that is run when the alarm fires
*/
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
{
timerqueue_init(&alarm->node);
hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
HRTIMER_MODE_ABS);
alarm->timer.function = alarmtimer_fired;
alarm->function = function;
alarm->type = type;
alarm->state = ALARMTIMER_STATE_INACTIVE;
}
EXPORT_SYMBOL_GPL(alarm_init);
/**
* alarm_start - Sets an absolute alarm to fire
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
int alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
int ret;
spin_lock_irqsave(&base->lock, flags);
alarm->node.expires = start;
alarmtimer_enqueue(base, alarm);
ret = hrtimer_start(&alarm->timer, alarm->node.expires,
HRTIMER_MODE_ABS);
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(alarm_start);
/**
* alarm_start_relative - Sets a relative alarm to fire
* @alarm: ptr to alarm to set
* @start: time relative to now to run the alarm
*/
int alarm_start_relative(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
start = ktime_add(start, base->gettime());
return alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
void alarm_restart(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
spin_lock_irqsave(&base->lock, flags);
hrtimer_set_expires(&alarm->timer, alarm->node.expires);
hrtimer_restart(&alarm->timer);
alarmtimer_enqueue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(alarm_restart);
/**
* alarm_try_to_cancel - Tries to cancel an alarm timer
* @alarm: ptr to alarm to be canceled
*
* Returns 1 if the timer was canceled, 0 if it was not running,
* and -1 if the callback was running
*/
int alarm_try_to_cancel(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
int ret;
spin_lock_irqsave(&base->lock, flags);
ret = hrtimer_try_to_cancel(&alarm->timer);
if (ret >= 0)
alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
/**
* alarm_cancel - Spins trying to cancel an alarm timer until it is done
* @alarm: ptr to alarm to be canceled
*
* Returns 1 if the timer was canceled, 0 if it was not active.
*/
int alarm_cancel(struct alarm *alarm)
{
for (;;) {
int ret = alarm_try_to_cancel(alarm);
if (ret >= 0)
return ret;
cpu_relax();
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
{
u64 overrun = 1;
ktime_t delta;
delta = ktime_sub(now, alarm->node.expires);
if (delta.tv64 < 0)
return 0;
if (unlikely(delta.tv64 >= interval.tv64)) {
s64 incr = ktime_to_ns(interval);
overrun = ktime_divns(delta, incr);
alarm->node.expires = ktime_add_ns(alarm->node.expires,
incr*overrun);
if (alarm->node.expires.tv64 > now.tv64)
return overrun;
/*
* This (and the ktime_add() below) is the
* correction for exact:
*/
overrun++;
}
alarm->node.expires = ktime_add(alarm->node.expires, interval);
return overrun;
}
EXPORT_SYMBOL_GPL(alarm_forward);
u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
{
struct alarm_base *base = &alarm_bases[alarm->type];
return alarm_forward(alarm, base->gettime(), interval);
}
EXPORT_SYMBOL_GPL(alarm_forward_now);
/**
* clock2alarm - helper that converts from clockid to alarmtypes
* @clockid: clockid.
*/
static enum alarmtimer_type clock2alarm(clockid_t clockid)
{
if (clockid == CLOCK_REALTIME_ALARM)
return ALARM_REALTIME;
if (clockid == CLOCK_BOOTTIME_ALARM)
return ALARM_BOOTTIME;
return -1;
}
/**
* alarm_handle_timer - Callback for posix timers
* @alarm: alarm that fired
*
* Posix timer callback for expired alarm timers.
*/
static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
ktime_t now)
{
unsigned long flags;
struct k_itimer *ptr = container_of(alarm, struct k_itimer,
it.alarm.alarmtimer);
enum alarmtimer_restart result = ALARMTIMER_NORESTART;
spin_lock_irqsave(&ptr->it_lock, flags);
if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
if (posix_timer_event(ptr, 0) != 0)
ptr->it_overrun++;
}
/* Re-add periodic timers */
if (ptr->it.alarm.interval.tv64) {
ptr->it_overrun += alarm_forward(alarm, now,
ptr->it.alarm.interval);
result = ALARMTIMER_RESTART;
}
spin_unlock_irqrestore(&ptr->it_lock, flags);
return result;
}
/**
* alarm_clock_getres - posix getres interface
* @which_clock: clockid
* @tp: timespec to fill
*
* Returns the granularity of underlying alarm base clock
*/
static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
if (!alarmtimer_get_rtcdev())
return -EINVAL;
return hrtimer_get_res(baseid, tp);
}
/**
* alarm_clock_get - posix clock_get interface
* @which_clock: clockid
* @tp: timespec to fill.
*
* Provides the underlying alarm base time.
*/
static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
{
struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
if (!alarmtimer_get_rtcdev())
return -EINVAL;
*tp = ktime_to_timespec(base->gettime());
return 0;
}
/**
* alarm_timer_create - posix timer_create interface
* @new_timer: k_itimer pointer to manage
*
* Initializes the k_itimer structure.
*/
static int alarm_timer_create(struct k_itimer *new_timer)
{
enum alarmtimer_type type;
struct alarm_base *base;
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
if (!capable(CAP_WAKE_ALARM))
return -EPERM;
type = clock2alarm(new_timer->it_clock);
base = &alarm_bases[type];
alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer);
return 0;
}
/**
* alarm_timer_get - posix timer_get interface
* @new_timer: k_itimer pointer
* @cur_setting: itimerspec data to fill
*
* Copies out the current itimerspec data
*/
static void alarm_timer_get(struct k_itimer *timr,
struct itimerspec *cur_setting)
{
ktime_t relative_expiry_time =
alarm_expires_remaining(&(timr->it.alarm.alarmtimer));
if (ktime_to_ns(relative_expiry_time) > 0) {
cur_setting->it_value = ktime_to_timespec(relative_expiry_time);
} else {
cur_setting->it_value.tv_sec = 0;
cur_setting->it_value.tv_nsec = 0;
}
cur_setting->it_interval = ktime_to_timespec(timr->it.alarm.interval);
}
/**
* alarm_timer_del - posix timer_del interface
* @timr: k_itimer pointer to be deleted
*
* Cancels any programmed alarms for the given timer.
*/
static int alarm_timer_del(struct k_itimer *timr)
{
if (!rtcdev)
return -ENOTSUPP;
if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0)
return TIMER_RETRY;
return 0;
}
/**
* alarm_timer_set - posix timer_set interface
* @timr: k_itimer pointer to be deleted
* @flags: timer flags
* @new_setting: itimerspec to be used
* @old_setting: itimerspec being replaced
*
* Sets the timer to new_setting, and starts the timer.
*/
static int alarm_timer_set(struct k_itimer *timr, int flags,
struct itimerspec *new_setting,
struct itimerspec *old_setting)
{
ktime_t exp;
if (!rtcdev)
return -ENOTSUPP;
if (flags & ~TIMER_ABSTIME)
return -EINVAL;
if (old_setting)
alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0)
return TIMER_RETRY;
/* start the timer */
timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
exp = timespec_to_ktime(new_setting->it_value);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now;
now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
exp = ktime_add(now, exp);
}
alarm_start(&timr->it.alarm.alarmtimer, exp);
return 0;
}
/**
* alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep
* @alarm: ptr to alarm that fired
*
* Wakes up the task that set the alarmtimer
*/
static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm,
ktime_t now)
{
struct task_struct *task = (struct task_struct *)alarm->data;
alarm->data = NULL;
if (task)
wake_up_process(task);
return ALARMTIMER_NORESTART;
}
/**
* alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation
* @alarm: ptr to alarmtimer
* @absexp: absolute expiration time
*
* Sets the alarm timer and sleeps until it is fired or interrupted.
*/
static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
{
alarm->data = (void *)current;
do {
set_current_state(TASK_INTERRUPTIBLE);
alarm_start(alarm, absexp);
if (likely(alarm->data))
schedule();
alarm_cancel(alarm);
} while (alarm->data && !signal_pending(current));
__set_current_state(TASK_RUNNING);
return (alarm->data == NULL);
}
/**
* update_rmtp - Update remaining timespec value
* @exp: expiration time
* @type: timer type
* @rmtp: user pointer to remaining timepsec value
*
* Helper function that fills in rmtp value with time between
* now and the exp value
*/
static int update_rmtp(ktime_t exp, enum alarmtimer_type type,
struct timespec __user *rmtp)
{
struct timespec rmt;
ktime_t rem;
rem = ktime_sub(exp, alarm_bases[type].gettime());
if (rem.tv64 <= 0)
return 0;
rmt = ktime_to_timespec(rem);
if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
return -EFAULT;
return 1;
}
/**
* alarm_timer_nsleep_restart - restartblock alarmtimer nsleep
* @restart: ptr to restart block
*
* Handles restarted clock_nanosleep calls
*/
static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
{
enum alarmtimer_type type = restart->nanosleep.clockid;
ktime_t exp;
struct timespec __user *rmtp;
struct alarm alarm;
int ret = 0;
exp.tv64 = restart->nanosleep.expires;
alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
if (alarmtimer_do_nsleep(&alarm, exp))
goto out;
if (freezing(current))
alarmtimer_freezerset(exp, type);
rmtp = restart->nanosleep.rmtp;
if (rmtp) {
ret = update_rmtp(exp, type, rmtp);
if (ret <= 0)
goto out;
}
/* The other values in restart are already filled in */
ret = -ERESTART_RESTARTBLOCK;
out:
return ret;
}
/**
* alarm_timer_nsleep - alarmtimer nanosleep
* @which_clock: clockid
* @flags: determins abstime or relative
* @tsreq: requested sleep time (abs or rel)
* @rmtp: remaining sleep time saved
*
* Handles clock_nanosleep calls against _ALARM clockids
*/
static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
struct timespec *tsreq, struct timespec __user *rmtp)
{
enum alarmtimer_type type = clock2alarm(which_clock);
struct alarm alarm;
ktime_t exp;
int ret = 0;
struct restart_block *restart;
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
if (flags & ~TIMER_ABSTIME)
return -EINVAL;
if (!capable(CAP_WAKE_ALARM))
return -EPERM;
alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
exp = timespec_to_ktime(*tsreq);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now = alarm_bases[type].gettime();
exp = ktime_add(now, exp);
}
if (alarmtimer_do_nsleep(&alarm, exp))
goto out;
if (freezing(current))
alarmtimer_freezerset(exp, type);
/* abs timers don't set remaining time or restart */
if (flags == TIMER_ABSTIME) {
ret = -ERESTARTNOHAND;
goto out;
}
if (rmtp) {
ret = update_rmtp(exp, type, rmtp);
if (ret <= 0)
goto out;
}
restart = &current_thread_info()->restart_block;
restart->fn = alarm_timer_nsleep_restart;
restart->nanosleep.clockid = type;
restart->nanosleep.expires = exp.tv64;
restart->nanosleep.rmtp = rmtp;
ret = -ERESTART_RESTARTBLOCK;
out:
return ret;
}
/* Suspend hook structures */
static const struct dev_pm_ops alarmtimer_pm_ops = {
.suspend = alarmtimer_suspend,
};
static struct platform_driver alarmtimer_driver = {
.driver = {
.name = "alarmtimer",
.pm = &alarmtimer_pm_ops,
}
};
/**
* alarmtimer_init - Initialize alarm timer code
*
* This function initializes the alarm bases and registers
* the posix clock ids.
*/
static int __init alarmtimer_init(void)
{
struct platform_device *pdev;
int error = 0;
int i;
struct k_clock alarm_clock = {
.clock_getres = alarm_clock_getres,
.clock_get = alarm_clock_get,
.timer_create = alarm_timer_create,
.timer_set = alarm_timer_set,
.timer_del = alarm_timer_del,
.timer_get = alarm_timer_get,
.nsleep = alarm_timer_nsleep,
};
alarmtimer_rtc_timer_init();
posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
/* Initialize alarm bases */
alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
for (i = 0; i < ALARM_NUMTYPE; i++) {
timerqueue_init_head(&alarm_bases[i].timerqueue);
spin_lock_init(&alarm_bases[i].lock);
}
error = alarmtimer_rtc_interface_setup();
if (error)
return error;
error = platform_driver_register(&alarmtimer_driver);
if (error)
goto out_if;
pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0);
if (IS_ERR(pdev)) {
error = PTR_ERR(pdev);
goto out_drv;
}
ws = wakeup_source_register("alarmtimer");
return 0;
out_drv:
platform_driver_unregister(&alarmtimer_driver);
out_if:
alarmtimer_rtc_interface_remove();
return error;
}
device_initcall(alarmtimer_init);

739
kernel/time/clockevents.c Normal file
View file

@ -0,0 +1,739 @@
/*
* linux/kernel/time/clockevents.c
*
* This file contains functions which manage clock event devices.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/clockchips.h>
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/device.h>
#include <linux/exynos-ss.h>
#include "tick-internal.h"
#include <trace/events/exynos.h>
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
/* Protection for the above */
static DEFINE_RAW_SPINLOCK(clockevents_lock);
/* Protection for unbind operations */
static DEFINE_MUTEX(clockevents_mutex);
struct ce_unbind {
struct clock_event_device *ce;
int res;
};
static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt,
bool ismax)
{
u64 clc = (u64) latch << evt->shift;
u64 rnd;
if (unlikely(!evt->mult)) {
evt->mult = 1;
WARN_ON(1);
}
rnd = (u64) evt->mult - 1;
/*
* Upper bound sanity check. If the backwards conversion is
* not equal latch, we know that the above shift overflowed.
*/
if ((clc >> evt->shift) != (u64)latch)
clc = ~0ULL;
/*
* Scaled math oddities:
*
* For mult <= (1 << shift) we can safely add mult - 1 to
* prevent integer rounding loss. So the backwards conversion
* from nsec to device ticks will be correct.
*
* For mult > (1 << shift), i.e. device frequency is > 1GHz we
* need to be careful. Adding mult - 1 will result in a value
* which when converted back to device ticks can be larger
* than latch by up to (mult - 1) >> shift. For the min_delta
* calculation we still want to apply this in order to stay
* above the minimum device ticks limit. For the upper limit
* we would end up with a latch value larger than the upper
* limit of the device, so we omit the add to stay below the
* device upper boundary.
*
* Also omit the add if it would overflow the u64 boundary.
*/
if ((~0ULL - clc > rnd) &&
(!ismax || evt->mult <= (1ULL << evt->shift)))
clc += rnd;
do_div(clc, evt->mult);
/* Deltas less than 1usec are pointless noise */
return clc > 1000 ? clc : 1000;
}
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
* @latch: value to convert
* @evt: pointer to clock event device descriptor
*
* Math helper, returns latch value converted to nanoseconds (bound checked)
*/
u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
{
return cev_delta2ns(latch, evt, false);
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
/**
* clockevents_set_mode - set the operating mode of a clock event device
* @dev: device to modify
* @mode: new mode
*
* Must be called with interrupts disabled !
*/
void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
if (dev->mode != mode) {
dev->set_mode(mode, dev);
dev->mode = mode;
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:
*/
if (mode == CLOCK_EVT_MODE_ONESHOT) {
if (unlikely(!dev->mult)) {
dev->mult = 1;
WARN_ON(1);
}
}
}
}
/**
* clockevents_shutdown - shutdown the device and clear next_event
* @dev: device to shutdown
*/
void clockevents_shutdown(struct clock_event_device *dev)
{
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
dev->next_event.tv64 = KTIME_MAX;
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
/* Limit min_delta to a jiffie */
#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
/**
* clockevents_increase_min_delta - raise minimum delta of a clock event device
* @dev: device to increase the minimum delta
*
* Returns 0 on success, -ETIME when the minimum delta reached the limit.
*/
static int clockevents_increase_min_delta(struct clock_event_device *dev)
{
/* Nothing to do if we already reached the limit */
if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
printk_deferred(KERN_WARNING
"CE: Reprogramming failure. Giving up\n");
dev->next_event.tv64 = KTIME_MAX;
return -ETIME;
}
if (dev->min_delta_ns < 5000)
dev->min_delta_ns = 5000;
else
dev->min_delta_ns += dev->min_delta_ns >> 1;
if (dev->min_delta_ns > MIN_DELTA_LIMIT)
dev->min_delta_ns = MIN_DELTA_LIMIT;
printk_deferred(KERN_WARNING
"CE: %s increased min_delta_ns to %llu nsec\n",
dev->name ? dev->name : "?",
(unsigned long long) dev->min_delta_ns);
return 0;
}
/**
* clockevents_program_min_delta - Set clock event device to the minimum delay.
* @dev: device to program
*
* Returns 0 on success, -ETIME when the retry loop failed.
*/
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta;
int i;
for (i = 0;;) {
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta);
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
dev->retries++;
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
exynos_ss_clockevent(clc, delta, &dev->next_event);
trace_exynos_clockevent(clc, delta, &dev->next_event);
if (dev->set_next_event((unsigned long) clc, dev) == 0)
return 0;
if (++i > 2) {
/*
* We tried 3 times to program the device with the
* given min_delta_ns. Try to increase the minimum
* delta, if that fails as well get out of here.
*/
if (clockevents_increase_min_delta(dev))
return -ETIME;
i = 0;
}
}
}
#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
/**
* clockevents_program_min_delta - Set clock event device to the minimum delay.
* @dev: device to program
*
* Returns 0 on success, -ETIME when the retry loop failed.
*/
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta;
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta);
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
dev->retries++;
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
exynos_ss_clockevent(clc, delta, &dev->next_event);
trace_exynos_clockevent(clc, delta, &dev->next_event);
return dev->set_next_event((unsigned long) clc, dev);
}
#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
/**
* clockevents_program_event - Reprogram the clock event device.
* @dev: device to program
* @expires: absolute expiry time (monotonic clock)
* @force: program minimum delay if expires can not be set
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
bool force)
{
unsigned long long clc;
int64_t delta;
int rc;
if (unlikely(expires.tv64 < 0)) {
WARN_ON_ONCE(1);
return -ETIME;
}
dev->next_event = expires;
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
/* Shortcut for clockevent devices that can deal with ktime. */
if (dev->features & CLOCK_EVT_FEAT_KTIME)
return dev->set_next_ktime(expires, dev);
delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
if (delta <= 0)
return force ? clockevents_program_min_delta(dev) : -ETIME;
delta = min(delta, (int64_t) dev->max_delta_ns);
delta = max(delta, (int64_t) dev->min_delta_ns);
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
exynos_ss_clockevent(clc, delta, &dev->next_event);
trace_exynos_clockevent(clc, delta, &dev->next_event);
rc = dev->set_next_event((unsigned long) clc, dev);
return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}
/*
* Called after a notify add to make devices available which were
* released from the notifier call.
*/
static void clockevents_notify_released(void)
{
struct clock_event_device *dev;
while (!list_empty(&clockevents_released)) {
dev = list_entry(clockevents_released.next,
struct clock_event_device, list);
list_del(&dev->list);
list_add(&dev->list, &clockevent_devices);
tick_check_new_device(dev);
}
}
/*
* Try to install a replacement clock event device
*/
static int clockevents_replace(struct clock_event_device *ced)
{
struct clock_event_device *dev, *newdev = NULL;
list_for_each_entry(dev, &clockevent_devices, list) {
if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
continue;
if (!tick_check_replacement(newdev, dev))
continue;
if (!try_module_get(dev->owner))
continue;
if (newdev)
module_put(newdev->owner);
newdev = dev;
}
if (newdev) {
tick_install_replacement(newdev);
list_del_init(&ced->list);
}
return newdev ? 0 : -EBUSY;
}
/*
* Called with clockevents_mutex and clockevents_lock held
*/
static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
{
/* Fast track. Device is unused */
if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
list_del_init(&ced->list);
return 0;
}
return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
}
/*
* SMP function call to unbind a device
*/
static void __clockevents_unbind(void *arg)
{
struct ce_unbind *cu = arg;
int res;
raw_spin_lock(&clockevents_lock);
res = __clockevents_try_unbind(cu->ce, smp_processor_id());
if (res == -EAGAIN)
res = clockevents_replace(cu->ce);
cu->res = res;
raw_spin_unlock(&clockevents_lock);
}
/*
* Issues smp function call to unbind a per cpu device. Called with
* clockevents_mutex held.
*/
static int clockevents_unbind(struct clock_event_device *ced, int cpu)
{
struct ce_unbind cu = { .ce = ced, .res = -ENODEV };
smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
return cu.res;
}
/*
* Unbind a clockevents device.
*/
int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
{
int ret;
mutex_lock(&clockevents_mutex);
ret = clockevents_unbind(ced, cpu);
mutex_unlock(&clockevents_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(clockevents_unbind);
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
*/
void clockevents_register_device(struct clock_event_device *dev)
{
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
}
raw_spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
tick_check_new_device(dev);
clockevents_notify_released();
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);
void clockevents_config(struct clock_event_device *dev, u32 freq)
{
u64 sec;
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return;
/*
* Calculate the maximum number of seconds we can sleep. Limit
* to 10 minutes for hardware which can program more than
* 32bit ticks so we still get reasonable conversion values.
*/
sec = dev->max_delta_ticks;
do_div(sec, freq);
if (!sec)
sec = 1;
else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
sec = 600;
clockevents_calc_mult_shift(dev, freq, sec);
dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
}
/**
* clockevents_config_and_register - Configure and register a clock event device
* @dev: device to register
* @freq: The clock frequency
* @min_delta: The minimum clock ticks to program in oneshot mode
* @max_delta: The maximum clock ticks to program in oneshot mode
*
* min/max_delta can be 0 for devices which do not support oneshot mode.
*/
void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta)
{
dev->min_delta_ticks = min_delta;
dev->max_delta_ticks = max_delta;
clockevents_config(dev, freq);
clockevents_register_device(dev);
}
EXPORT_SYMBOL_GPL(clockevents_config_and_register);
int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
clockevents_config(dev, freq);
if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
return clockevents_program_event(dev, dev->next_event, false);
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
return 0;
}
/**
* clockevents_update_freq - Update frequency and reprogram a clock event device.
* @dev: device to modify
* @freq: new device frequency
*
* Reconfigure and reprogram a clock event device in oneshot
* mode. Must be called on the cpu for which the device delivers per
* cpu timer events. If called for the broadcast device the core takes
* care of serialization.
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
unsigned long flags;
int ret;
local_irq_save(flags);
ret = tick_broadcast_update_freq(dev, freq);
if (ret == -ENODEV)
ret = __clockevents_update_freq(dev, freq);
local_irq_restore(flags);
return ret;
}
/*
* Noop handler when we shut down an event device
*/
void clockevents_handle_noop(struct clock_event_device *dev)
{
}
/**
* clockevents_exchange_device - release and request clock devices
* @old: device to release (can be NULL)
* @new: device to request (can be NULL)
*
* Called from the notifier chain. clockevents_lock is held already
*/
void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new)
{
unsigned long flags;
local_irq_save(flags);
/*
* Caller releases a clock event device. We queue it into the
* released list and do a notify add later.
*/
if (old) {
module_put(old->owner);
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);
}
if (new) {
BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
clockevents_shutdown(new);
}
local_irq_restore(flags);
}
/**
* clockevents_suspend - suspend clock devices
*/
void clockevents_suspend(void)
{
struct clock_event_device *dev;
list_for_each_entry_reverse(dev, &clockevent_devices, list)
if (dev->suspend)
dev->suspend(dev);
}
/**
* clockevents_resume - resume clock devices
*/
void clockevents_resume(void)
{
struct clock_event_device *dev;
list_for_each_entry(dev, &clockevent_devices, list)
if (dev->resume)
dev->resume(dev);
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS
/**
* clockevents_notify - notification about relevant events
* Returns 0 on success, any other value on error
*/
int clockevents_notify(unsigned long reason, void *arg)
{
struct clock_event_device *dev, *tmp;
unsigned long flags;
int cpu, ret = 0;
raw_spin_lock_irqsave(&clockevents_lock, flags);
switch (reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
tick_broadcast_on_off(reason, arg);
break;
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
ret = tick_broadcast_oneshot_control(reason);
break;
case CLOCK_EVT_NOTIFY_CPU_DYING:
tick_handover_do_timer(arg);
break;
case CLOCK_EVT_NOTIFY_SUSPEND:
tick_suspend();
tick_suspend_broadcast();
break;
case CLOCK_EVT_NOTIFY_RESUME:
tick_resume();
break;
case CLOCK_EVT_NOTIFY_CPU_DEAD:
tick_shutdown_broadcast_oneshot(arg);
tick_shutdown_broadcast(arg);
tick_shutdown(arg);
/*
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
list_del(&dev->list);
/*
* Now check whether the CPU has left unused per cpu devices
*/
cpu = *((int *)arg);
list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
if (cpumask_test_cpu(cpu, dev->cpumask) &&
cpumask_weight(dev->cpumask) == 1 &&
!tick_is_broadcast_device(dev)) {
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
list_del(&dev->list);
}
}
break;
default:
break;
}
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(clockevents_notify);
#ifdef CONFIG_SYSFS
struct bus_type clockevents_subsys = {
.name = "clockevents",
.dev_name = "clockevent",
};
static DEFINE_PER_CPU(struct device, tick_percpu_dev);
static struct tick_device *tick_get_tick_dev(struct device *dev);
static ssize_t sysfs_show_current_tick_dev(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct tick_device *td;
ssize_t count = 0;
raw_spin_lock_irq(&clockevents_lock);
td = tick_get_tick_dev(dev);
if (td && td->evtdev)
count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
raw_spin_unlock_irq(&clockevents_lock);
return count;
}
static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
/* We don't support the abomination of removable broadcast devices */
static ssize_t sysfs_unbind_tick_dev(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
char name[CS_NAME_LEN];
ssize_t ret = sysfs_get_uname(buf, name, count);
struct clock_event_device *ce;
if (ret < 0)
return ret;
ret = -ENODEV;
mutex_lock(&clockevents_mutex);
raw_spin_lock_irq(&clockevents_lock);
list_for_each_entry(ce, &clockevent_devices, list) {
if (!strcmp(ce->name, name)) {
ret = __clockevents_try_unbind(ce, dev->id);
break;
}
}
raw_spin_unlock_irq(&clockevents_lock);
/*
* We hold clockevents_mutex, so ce can't go away
*/
if (ret == -EAGAIN)
ret = clockevents_unbind(ce, dev->id);
mutex_unlock(&clockevents_mutex);
return ret ? ret : count;
}
static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
static struct device tick_bc_dev = {
.init_name = "broadcast",
.id = 0,
.bus = &clockevents_subsys,
};
static struct tick_device *tick_get_tick_dev(struct device *dev)
{
return dev == &tick_bc_dev ? tick_get_broadcast_device() :
&per_cpu(tick_cpu_device, dev->id);
}
static __init int tick_broadcast_init_sysfs(void)
{
int err = device_register(&tick_bc_dev);
if (!err)
err = device_create_file(&tick_bc_dev, &dev_attr_current_device);
return err;
}
#else
static struct tick_device *tick_get_tick_dev(struct device *dev)
{
return &per_cpu(tick_cpu_device, dev->id);
}
static inline int tick_broadcast_init_sysfs(void) { return 0; }
#endif
static int __init tick_init_sysfs(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct device *dev = &per_cpu(tick_percpu_dev, cpu);
int err;
dev->id = cpu;
dev->bus = &clockevents_subsys;
err = device_register(dev);
if (!err)
err = device_create_file(dev, &dev_attr_current_device);
if (!err)
err = device_create_file(dev, &dev_attr_unbind_device);
if (err)
return err;
}
return tick_broadcast_init_sysfs();
}
static int __init clockevents_init_sysfs(void)
{
int err = subsys_system_register(&clockevents_subsys, NULL);
if (!err)
err = tick_init_sysfs();
return err;
}
device_initcall(clockevents_init_sysfs);
#endif /* SYSFS */
#endif /* GENERIC_CLOCK_EVENTS */

1109
kernel/time/clocksource.c Normal file

File diff suppressed because it is too large Load diff

1868
kernel/time/hrtimer.c Normal file

File diff suppressed because it is too large Load diff

301
kernel/time/itimer.c Normal file
View file

@ -0,0 +1,301 @@
/*
* linux/kernel/itimer.c
*
* Copyright (C) 1992 Darren Senn
*/
/* These are all the functions necessary to implement itimers */
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/syscalls.h>
#include <linux/time.h>
#include <linux/posix-timers.h>
#include <linux/hrtimer.h>
#include <trace/events/timer.h>
#include <asm/uaccess.h>
/**
* itimer_get_remtime - get remaining time for the timer
*
* @timer: the timer to read
*
* Returns the delta between the expiry time and now, which can be
* less than zero or 1usec for an pending expired timer
*/
static struct timeval itimer_get_remtime(struct hrtimer *timer)
{
ktime_t rem = hrtimer_get_remaining(timer);
/*
* Racy but safe: if the itimer expires after the above
* hrtimer_get_remtime() call but before this condition
* then we return 0 - which is correct.
*/
if (hrtimer_active(timer)) {
if (rem.tv64 <= 0)
rem.tv64 = NSEC_PER_USEC;
} else
rem.tv64 = 0;
return ktime_to_timeval(rem);
}
static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
struct itimerval *const value)
{
cputime_t cval, cinterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
spin_lock_irq(&tsk->sighand->siglock);
cval = it->expires;
cinterval = it->incr;
if (cval) {
struct task_cputime cputime;
cputime_t t;
thread_group_cputimer(tsk, &cputime);
if (clock_id == CPUCLOCK_PROF)
t = cputime.utime + cputime.stime;
else
/* CPUCLOCK_VIRT */
t = cputime.utime;
if (cval < t)
/* about to fire */
cval = cputime_one_jiffy;
else
cval = cval - t;
}
spin_unlock_irq(&tsk->sighand->siglock);
cputime_to_timeval(cval, &value->it_value);
cputime_to_timeval(cinterval, &value->it_interval);
}
int do_getitimer(int which, struct itimerval *value)
{
struct task_struct *tsk = current;
switch (which) {
case ITIMER_REAL:
spin_lock_irq(&tsk->sighand->siglock);
value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
value->it_interval =
ktime_to_timeval(tsk->signal->it_real_incr);
spin_unlock_irq(&tsk->sighand->siglock);
break;
case ITIMER_VIRTUAL:
get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
break;
case ITIMER_PROF:
get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
break;
default:
return(-EINVAL);
}
return 0;
}
SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
{
int error = -EFAULT;
struct itimerval get_buffer;
if (value) {
error = do_getitimer(which, &get_buffer);
if (!error &&
copy_to_user(value, &get_buffer, sizeof(get_buffer)))
error = -EFAULT;
}
return error;
}
/*
* The timer is automagically restarted, when interval != 0
*/
enum hrtimer_restart it_real_fn(struct hrtimer *timer)
{
struct signal_struct *sig =
container_of(timer, struct signal_struct, real_timer);
trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
return HRTIMER_NORESTART;
}
static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
{
struct timespec ts;
s64 cpu_ns;
cputime_to_timespec(ct, &ts);
cpu_ns = timespec_to_ns(&ts);
return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
}
static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
const struct itimerval *const value,
struct itimerval *const ovalue)
{
cputime_t cval, nval, cinterval, ninterval;
s64 ns_ninterval, ns_nval;
u32 error, incr_error;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
nval = timeval_to_cputime(&value->it_value);
ns_nval = timeval_to_ns(&value->it_value);
ninterval = timeval_to_cputime(&value->it_interval);
ns_ninterval = timeval_to_ns(&value->it_interval);
error = cputime_sub_ns(nval, ns_nval);
incr_error = cputime_sub_ns(ninterval, ns_ninterval);
spin_lock_irq(&tsk->sighand->siglock);
cval = it->expires;
cinterval = it->incr;
if (cval || nval) {
if (nval > 0)
nval += cputime_one_jiffy;
set_process_cpu_timer(tsk, clock_id, &nval, &cval);
}
it->expires = nval;
it->incr = ninterval;
it->error = error;
it->incr_error = incr_error;
trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
spin_unlock_irq(&tsk->sighand->siglock);
if (ovalue) {
cputime_to_timeval(cval, &ovalue->it_value);
cputime_to_timeval(cinterval, &ovalue->it_interval);
}
}
/*
* Returns true if the timeval is in canonical form
*/
#define timeval_valid(t) \
(((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
struct task_struct *tsk = current;
struct hrtimer *timer;
ktime_t expires;
/*
* Validate the timevals in value.
*/
if (!timeval_valid(&value->it_value) ||
!timeval_valid(&value->it_interval))
return -EINVAL;
switch (which) {
case ITIMER_REAL:
again:
spin_lock_irq(&tsk->sighand->siglock);
timer = &tsk->signal->real_timer;
if (ovalue) {
ovalue->it_value = itimer_get_remtime(timer);
ovalue->it_interval
= ktime_to_timeval(tsk->signal->it_real_incr);
}
/* We are sharing ->siglock with it_real_fn() */
if (hrtimer_try_to_cancel(timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
goto again;
}
expires = timeval_to_ktime(value->it_value);
if (expires.tv64 != 0) {
tsk->signal->it_real_incr =
timeval_to_ktime(value->it_interval);
hrtimer_start(timer, expires, HRTIMER_MODE_REL);
} else
tsk->signal->it_real_incr.tv64 = 0;
trace_itimer_state(ITIMER_REAL, value, 0);
spin_unlock_irq(&tsk->sighand->siglock);
break;
case ITIMER_VIRTUAL:
set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
break;
case ITIMER_PROF:
set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
break;
default:
return -EINVAL;
}
return 0;
}
/**
* alarm_setitimer - set alarm in seconds
*
* @seconds: number of seconds until alarm
* 0 disables the alarm
*
* Returns the remaining time in seconds of a pending timer or 0 when
* the timer is not active.
*
* On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
* negative timeval settings which would cause immediate expiry.
*/
unsigned int alarm_setitimer(unsigned int seconds)
{
struct itimerval it_new, it_old;
#if BITS_PER_LONG < 64
if (seconds > INT_MAX)
seconds = INT_MAX;
#endif
it_new.it_value.tv_sec = seconds;
it_new.it_value.tv_usec = 0;
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
do_setitimer(ITIMER_REAL, &it_new, &it_old);
/*
* We can't return 0 if we have an alarm pending ... And we'd
* better return too much than too little anyway
*/
if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
it_old.it_value.tv_usec >= 500000)
it_old.it_value.tv_sec++;
return it_old.it_value.tv_sec;
}
SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
struct itimerval __user *, ovalue)
{
struct itimerval set_buffer, get_buffer;
int error;
if (value) {
if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
return -EFAULT;
} else {
memset(&set_buffer, 0, sizeof(set_buffer));
printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
" Misfeature support will be removed\n",
current->comm);
}
error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
if (error || !ovalue)
return error;
if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
return -EFAULT;
return 0;
}

135
kernel/time/jiffies.c Normal file
View file

@ -0,0 +1,135 @@
/***********************************************************************
* linux/kernel/time/jiffies.c
*
* This file contains the jiffies based clocksource.
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
************************************************************************/
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/init.h>
#include "tick-internal.h"
/* The Jiffies based clocksource is the lowest common
* denominator clock source which should function on
* all systems. It has the same coarse resolution as
* the timer interrupt frequency HZ and it suffers
* inaccuracies caused by missed or lost timer
* interrupts and the inability for the timer
* interrupt hardware to accuratly tick at the
* requested HZ value. It is also not recommended
* for "tick-less" systems.
*/
#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)
/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
* conversion, the .shift value could be zero. However
* this would make NTP adjustments impossible as they are
* in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
* shift both the nominator and denominator the same
* amount, and give ntp adjustments in units of 1/2^8
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. NSEC_PER_JIFFY grows as
* HZ shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#if HZ < 34
#define JIFFIES_SHIFT 6
#elif HZ < 67
#define JIFFIES_SHIFT 7
#else
#define JIFFIES_SHIFT 8
#endif
static cycle_t jiffies_read(struct clocksource *cs)
{
return (cycle_t) jiffies;
}
static struct clocksource clocksource_jiffies = {
.name = "jiffies",
.rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
};
__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
unsigned long seq;
u64 ret;
do {
seq = read_seqbegin(&jiffies_lock);
ret = jiffies_64;
} while (read_seqretry(&jiffies_lock, seq));
return ret;
}
EXPORT_SYMBOL(get_jiffies_64);
#endif
EXPORT_SYMBOL(jiffies);
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
}
core_initcall(init_jiffies_clocksource);
struct clocksource * __init __weak clocksource_default_clock(void)
{
return &clocksource_jiffies;
}
struct clocksource refined_jiffies;
int register_refined_jiffies(long cycles_per_second)
{
u64 nsec_per_tick, shift_hz;
long cycles_per_tick;
refined_jiffies = clocksource_jiffies;
refined_jiffies.name = "refined-jiffies";
refined_jiffies.rating++;
/* Calc cycles per tick */
cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
/* shift_hz stores hz<<8 for extra accuracy */
shift_hz = (u64)cycles_per_second << 8;
shift_hz += cycles_per_tick/2;
do_div(shift_hz, cycles_per_tick);
/* Calculate nsec_per_tick using shift_hz */
nsec_per_tick = (u64)NSEC_PER_SEC << 8;
nsec_per_tick += (u32)shift_hz/2;
do_div(nsec_per_tick, (u32)shift_hz);
refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
clocksource_register(&refined_jiffies);
return 0;
}

955
kernel/time/ntp.c Normal file
View file

@ -0,0 +1,955 @@
/*
* NTP state machine interfaces and logic.
*
* This code was mainly moved from kernel/timer.c and kernel/time.c
* Please see those files for relevant copyright info and historical
* changelogs.
*/
#include <linux/capability.h>
#include <linux/clocksource.h>
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
#include <linux/jiffies.h>
#include <linux/math64.h>
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/rtc.h>
#include "tick-internal.h"
#include "ntp_internal.h"
/*
* NTP timekeeping variables:
*
* Note: All of the NTP state is protected by the timekeeping locks.
*/
/* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC;
/* SHIFTED_HZ period (nsecs): */
unsigned long tick_nsec;
static u64 tick_length;
static u64 tick_length_base;
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
/*
* phase-lock loop variables
*/
/*
* clock synchronization status
*
* (TIME_ERROR prevents overwriting the CMOS clock)
*/
static int time_state = TIME_OK;
/* clock status bits: */
static int time_status = STA_UNSYNC;
/* time adjustment (nsecs): */
static s64 time_offset;
/* pll time constant: */
static long time_constant = 2;
/* maximum error (usecs): */
static long time_maxerror = NTP_PHASE_LIMIT;
/* estimated error (usecs): */
static long time_esterror = NTP_PHASE_LIMIT;
/* frequency offset (scaled nsecs/secs): */
static s64 time_freq;
/* time at last adjustment (secs): */
static long time_reftime;
static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
#ifdef CONFIG_NTP_PPS
/*
* The following variables are used when a pulse-per-second (PPS) signal
* is available. They establish the engineering parameters of the clock
* discipline loop when controlled by the PPS signal.
*/
#define PPS_VALID 10 /* PPS signal watchdog max (s) */
#define PPS_POPCORN 4 /* popcorn spike threshold (shift) */
#define PPS_INTMIN 2 /* min freq interval (s) (shift) */
#define PPS_INTMAX 8 /* max freq interval (s) (shift) */
#define PPS_INTCOUNT 4 /* number of consecutive good intervals to
increase pps_shift or consecutive bad
intervals to decrease it */
#define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */
static int pps_valid; /* signal watchdog counter */
static long pps_tf[3]; /* phase median filter */
static long pps_jitter; /* current jitter (ns) */
static struct timespec pps_fbase; /* beginning of the last freq interval */
static int pps_shift; /* current interval duration (s) (shift) */
static int pps_intcnt; /* interval counter */
static s64 pps_freq; /* frequency offset (scaled ns/s) */
static long pps_stabil; /* current stability (scaled ns/s) */
/*
* PPS signal quality monitors
*/
static long pps_calcnt; /* calibration intervals */
static long pps_jitcnt; /* jitter limit exceeded */
static long pps_stbcnt; /* stability limit exceeded */
static long pps_errcnt; /* calibration errors */
/* PPS kernel consumer compensates the whole phase error immediately.
* Otherwise, reduce the offset by a fixed factor times the time constant.
*/
static inline s64 ntp_offset_chunk(s64 offset)
{
if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
return offset;
else
return shift_right(offset, SHIFT_PLL + time_constant);
}
static inline void pps_reset_freq_interval(void)
{
/* the PPS calibration interval may end
surprisingly early */
pps_shift = PPS_INTMIN;
pps_intcnt = 0;
}
/**
* pps_clear - Clears the PPS state variables
*/
static inline void pps_clear(void)
{
pps_reset_freq_interval();
pps_tf[0] = 0;
pps_tf[1] = 0;
pps_tf[2] = 0;
pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
pps_freq = 0;
}
/* Decrease pps_valid to indicate that another second has passed since
* the last PPS signal. When it reaches 0, indicate that PPS signal is
* missing.
*/
static inline void pps_dec_valid(void)
{
if (pps_valid > 0)
pps_valid--;
else {
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
STA_PPSWANDER | STA_PPSERROR);
pps_clear();
}
}
static inline void pps_set_freq(s64 freq)
{
pps_freq = freq;
}
static inline int is_error_status(int status)
{
return (status & (STA_UNSYNC|STA_CLOCKERR))
/* PPS signal lost when either PPS time or
* PPS frequency synchronization requested
*/
|| ((status & (STA_PPSFREQ|STA_PPSTIME))
&& !(status & STA_PPSSIGNAL))
/* PPS jitter exceeded when
* PPS time synchronization requested */
|| ((status & (STA_PPSTIME|STA_PPSJITTER))
== (STA_PPSTIME|STA_PPSJITTER))
/* PPS wander exceeded or calibration error when
* PPS frequency synchronization requested
*/
|| ((status & STA_PPSFREQ)
&& (status & (STA_PPSWANDER|STA_PPSERROR)));
}
static inline void pps_fill_timex(struct timex *txc)
{
txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
txc->jitter = pps_jitter;
if (!(time_status & STA_NANO))
txc->jitter /= NSEC_PER_USEC;
txc->shift = pps_shift;
txc->stabil = pps_stabil;
txc->jitcnt = pps_jitcnt;
txc->calcnt = pps_calcnt;
txc->errcnt = pps_errcnt;
txc->stbcnt = pps_stbcnt;
}
#else /* !CONFIG_NTP_PPS */
static inline s64 ntp_offset_chunk(s64 offset)
{
return shift_right(offset, SHIFT_PLL + time_constant);
}
static inline void pps_reset_freq_interval(void) {}
static inline void pps_clear(void) {}
static inline void pps_dec_valid(void) {}
static inline void pps_set_freq(s64 freq) {}
static inline int is_error_status(int status)
{
return status & (STA_UNSYNC|STA_CLOCKERR);
}
static inline void pps_fill_timex(struct timex *txc)
{
/* PPS is not implemented, so these are zero */
txc->ppsfreq = 0;
txc->jitter = 0;
txc->shift = 0;
txc->stabil = 0;
txc->jitcnt = 0;
txc->calcnt = 0;
txc->errcnt = 0;
txc->stbcnt = 0;
}
#endif /* CONFIG_NTP_PPS */
/**
* ntp_synced - Returns 1 if the NTP status is not UNSYNC
*
*/
static inline int ntp_synced(void)
{
return !(time_status & STA_UNSYNC);
}
/*
* NTP methods:
*/
/*
* Update (tick_length, tick_length_base, tick_nsec), based
* on (tick_usec, ntp_tick_adj, time_freq):
*/
static void ntp_update_frequency(void)
{
u64 second_length;
u64 new_base;
second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
<< NTP_SCALE_SHIFT;
second_length += ntp_tick_adj;
second_length += time_freq;
tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
new_base = div_u64(second_length, NTP_INTERVAL_FREQ);
/*
* Don't wait for the next second_overflow, apply
* the change to the tick length immediately:
*/
tick_length += new_base - tick_length_base;
tick_length_base = new_base;
}
static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
{
time_status &= ~STA_MODE;
if (secs < MINSEC)
return 0;
if (!(time_status & STA_FLL) && (secs <= MAXSEC))
return 0;
time_status |= STA_MODE;
return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
}
static void ntp_update_offset(long offset)
{
s64 freq_adj;
s64 offset64;
long secs;
if (!(time_status & STA_PLL))
return;
if (!(time_status & STA_NANO))
offset *= NSEC_PER_USEC;
/*
* Scale the phase adjustment and
* clamp to the operating range.
*/
offset = min(offset, MAXPHASE);
offset = max(offset, -MAXPHASE);
/*
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
secs = get_seconds() - time_reftime;
if (unlikely(time_status & STA_FREQHOLD))
secs = 0;
time_reftime = get_seconds();
offset64 = offset;
freq_adj = ntp_update_offset_fll(offset64, secs);
/*
* Clamp update interval to reduce PLL gain with low
* sampling rate (e.g. intermittent network connection)
* to avoid instability.
*/
if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant)))
secs = 1 << (SHIFT_PLL + 1 + time_constant);
freq_adj += (offset64 * secs) <<
(NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));
freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED);
time_freq = max(freq_adj, -MAXFREQ_SCALED);
time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
}
/**
* ntp_clear - Clears the NTP state variables
*/
void ntp_clear(void)
{
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
ntp_update_frequency();
tick_length = tick_length_base;
time_offset = 0;
/* Clear PPS state variables */
pps_clear();
}
u64 ntp_tick_length(void)
{
return tick_length;
}
/*
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*
* Also handles leap second processing, and returns leap offset
*/
int second_overflow(unsigned long secs)
{
s64 delta;
int leap = 0;
/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
if (!(time_status & STA_INS))
time_state = TIME_OK;
else if (secs % 86400 == 0) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
}
break;
case TIME_DEL:
if (!(time_status & STA_DEL))
time_state = TIME_OK;
else if ((secs + 1) % 86400 == 0) {
leap = 1;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
}
break;
case TIME_OOP:
time_state = TIME_WAIT;
break;
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
if (time_maxerror > NTP_PHASE_LIMIT) {
time_maxerror = NTP_PHASE_LIMIT;
time_status |= STA_UNSYNC;
}
/* Compute the phase adjustment for the next second */
tick_length = tick_length_base;
delta = ntp_offset_chunk(time_offset);
time_offset -= delta;
tick_length += delta;
/* Check PPS signal */
pps_dec_valid();
if (!time_adjust)
goto out;
if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
goto out;
}
if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
goto out;
}
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
out:
return leap;
}
#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
static void sync_cmos_clock(struct work_struct *work);
static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
static void sync_cmos_clock(struct work_struct *work)
{
struct timespec64 now;
struct timespec next;
int fail = 1;
/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
* This code is run on a timer. If the clock is set, that timer
* may not expire at the correct time. Thus, we adjust...
* We want the clock to be within a couple of ticks from the target.
*/
if (!ntp_synced()) {
/*
* Not synced, exit, do not restart a timer (if one is
* running, let it run out).
*/
return;
}
getnstimeofday64(&now);
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) {
struct timespec adjust = timespec64_to_timespec(now);
fail = -ENODEV;
if (persistent_clock_is_local)
adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
#ifdef CONFIG_GENERIC_CMOS_UPDATE
fail = update_persistent_clock(adjust);
#endif
#ifdef CONFIG_RTC_SYSTOHC
if (fail == -ENODEV)
fail = rtc_set_ntp_time(adjust);
#endif
}
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
if (next.tv_nsec <= 0)
next.tv_nsec += NSEC_PER_SEC;
if (!fail || fail == -ENODEV)
next.tv_sec = 659;
else
next.tv_sec = 0;
if (next.tv_nsec >= NSEC_PER_SEC) {
next.tv_sec++;
next.tv_nsec -= NSEC_PER_SEC;
}
queue_delayed_work(system_power_efficient_wq,
&sync_cmos_work, timespec_to_jiffies(&next));
}
void ntp_notify_cmos_timer(void)
{
queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
}
#else
void ntp_notify_cmos_timer(void) { }
#endif
/*
* Propagate a new txc->status value into the NTP state:
*/
static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
{
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
/* restart PPS frequency calibration */
pps_reset_freq_interval();
}
/*
* If we turn on PLL adjustments then reset the
* reference time to current time.
*/
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
time_reftime = get_seconds();
/* only set allowed bits */
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
}
static inline void process_adjtimex_modes(struct timex *txc,
struct timespec64 *ts,
s32 *time_tai)
{
if (txc->modes & ADJ_STATUS)
process_adj_status(txc, ts);
if (txc->modes & ADJ_NANO)
time_status |= STA_NANO;
if (txc->modes & ADJ_MICRO)
time_status &= ~STA_NANO;
if (txc->modes & ADJ_FREQUENCY) {
time_freq = txc->freq * PPM_SCALE;
time_freq = min(time_freq, MAXFREQ_SCALED);
time_freq = max(time_freq, -MAXFREQ_SCALED);
/* update pps_freq */
pps_set_freq(time_freq);
}
if (txc->modes & ADJ_MAXERROR)
time_maxerror = txc->maxerror;
if (txc->modes & ADJ_ESTERROR)
time_esterror = txc->esterror;
if (txc->modes & ADJ_TIMECONST) {
time_constant = txc->constant;
if (!(time_status & STA_NANO))
time_constant += 4;
time_constant = min(time_constant, (long)MAXTC);
time_constant = max(time_constant, 0l);
}
if (txc->modes & ADJ_TAI && txc->constant > 0)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
ntp_update_offset(txc->offset);
if (txc->modes & ADJ_TICK)
tick_usec = txc->tick;
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
ntp_update_frequency();
}
/**
* ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
*/
int ntp_validate_timex(struct timex *txc)
{
if (txc->modes & ADJ_ADJTIME) {
/* singleshot must not be used with any other mode bits */
if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
return -EINVAL;
if (!(txc->modes & ADJ_OFFSET_READONLY) &&
!capable(CAP_SYS_TIME))
return -EPERM;
} else {
/* In order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
/*
* if the quartz is off by more than 10% then
* something is VERY wrong!
*/
if (txc->modes & ADJ_TICK &&
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
}
if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
return -EPERM;
/*
* Check for potential multiplication overflows that can
* only happen on 64-bit systems:
*/
if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
if (LLONG_MIN / PPM_SCALE > txc->freq)
return -EINVAL;
if (LLONG_MAX / PPM_SCALE < txc->freq)
return -EINVAL;
}
return 0;
}
/*
* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
{
int result;
if (txc->modes & ADJ_ADJTIME) {
long save_adjust = time_adjust;
if (!(txc->modes & ADJ_OFFSET_READONLY)) {
/* adjtime() is independent from ntp_adjtime() */
time_adjust = txc->offset;
ntp_update_frequency();
}
txc->offset = save_adjust;
} else {
/* If there are input parameters, then process them: */
if (txc->modes)
process_adjtimex_modes(txc, ts, time_tai);
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
NTP_SCALE_SHIFT);
if (!(time_status & STA_NANO))
txc->offset /= NSEC_PER_USEC;
}
result = time_state; /* mostly `TIME_OK' */
/* check for errors */
if (is_error_status(time_status))
result = TIME_ERROR;
txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
txc->maxerror = time_maxerror;
txc->esterror = time_esterror;
txc->status = time_status;
txc->constant = time_constant;
txc->precision = 1;
txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
txc->tick = tick_usec;
txc->tai = *time_tai;
/* fill PPS status fields */
pps_fill_timex(txc);
txc->time.tv_sec = (time_t)ts->tv_sec;
txc->time.tv_usec = ts->tv_nsec;
if (!(time_status & STA_NANO))
txc->time.tv_usec /= NSEC_PER_USEC;
return result;
}
#ifdef CONFIG_NTP_PPS
/* actually struct pps_normtime is good old struct timespec, but it is
* semantically different (and it is the reason why it was invented):
* pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
* while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
struct pps_normtime {
__kernel_time_t sec; /* seconds */
long nsec; /* nanoseconds */
};
/* normalize the timestamp so that nsec is in the
( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
{
struct pps_normtime norm = {
.sec = ts.tv_sec,
.nsec = ts.tv_nsec
};
if (norm.nsec > (NSEC_PER_SEC >> 1)) {
norm.nsec -= NSEC_PER_SEC;
norm.sec++;
}
return norm;
}
/* get current phase correction and jitter */
static inline long pps_phase_filter_get(long *jitter)
{
*jitter = pps_tf[0] - pps_tf[1];
if (*jitter < 0)
*jitter = -*jitter;
/* TODO: test various filters */
return pps_tf[0];
}
/* add the sample to the phase filter */
static inline void pps_phase_filter_add(long err)
{
pps_tf[2] = pps_tf[1];
pps_tf[1] = pps_tf[0];
pps_tf[0] = err;
}
/* decrease frequency calibration interval length.
* It is halved after four consecutive unstable intervals.
*/
static inline void pps_dec_freq_interval(void)
{
if (--pps_intcnt <= -PPS_INTCOUNT) {
pps_intcnt = -PPS_INTCOUNT;
if (pps_shift > PPS_INTMIN) {
pps_shift--;
pps_intcnt = 0;
}
}
}
/* increase frequency calibration interval length.
* It is doubled after four consecutive stable intervals.
*/
static inline void pps_inc_freq_interval(void)
{
if (++pps_intcnt >= PPS_INTCOUNT) {
pps_intcnt = PPS_INTCOUNT;
if (pps_shift < PPS_INTMAX) {
pps_shift++;
pps_intcnt = 0;
}
}
}
/* update clock frequency based on MONOTONIC_RAW clock PPS signal
* timestamps
*
* At the end of the calibration interval the difference between the
* first and last MONOTONIC_RAW clock timestamps divided by the length
* of the interval becomes the frequency update. If the interval was
* too long, the data are discarded.
* Returns the difference between old and new frequency values.
*/
static long hardpps_update_freq(struct pps_normtime freq_norm)
{
long delta, delta_mod;
s64 ftemp;
/* check if the frequency interval was too long */
if (freq_norm.sec > (2 << pps_shift)) {
time_status |= STA_PPSERROR;
pps_errcnt++;
pps_dec_freq_interval();
printk_deferred(KERN_ERR
"hardpps: PPSERROR: interval too long - %ld s\n",
freq_norm.sec);
return 0;
}
/* here the raw frequency offset and wander (stability) is
* calculated. If the wander is less than the wander threshold
* the interval is increased; otherwise it is decreased.
*/
ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
freq_norm.sec);
delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
pps_freq = ftemp;
if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
printk_deferred(KERN_WARNING
"hardpps: PPSWANDER: change=%ld\n", delta);
time_status |= STA_PPSWANDER;
pps_stbcnt++;
pps_dec_freq_interval();
} else { /* good sample */
pps_inc_freq_interval();
}
/* the stability metric is calculated as the average of recent
* frequency changes, but is used only for performance
* monitoring
*/
delta_mod = delta;
if (delta_mod < 0)
delta_mod = -delta_mod;
pps_stabil += (div_s64(((s64)delta_mod) <<
(NTP_SCALE_SHIFT - SHIFT_USEC),
NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;
/* if enabled, the system clock frequency is updated */
if ((time_status & STA_PPSFREQ) != 0 &&
(time_status & STA_FREQHOLD) == 0) {
time_freq = pps_freq;
ntp_update_frequency();
}
return delta;
}
/* correct REALTIME clock phase error against PPS signal */
static void hardpps_update_phase(long error)
{
long correction = -error;
long jitter;
/* add the sample to the median filter */
pps_phase_filter_add(correction);
correction = pps_phase_filter_get(&jitter);
/* Nominal jitter is due to PPS signal noise. If it exceeds the
* threshold, the sample is discarded; otherwise, if so enabled,
* the time offset is updated.
*/
if (jitter > (pps_jitter << PPS_POPCORN)) {
printk_deferred(KERN_WARNING
"hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
jitter, (pps_jitter << PPS_POPCORN));
time_status |= STA_PPSJITTER;
pps_jitcnt++;
} else if (time_status & STA_PPSTIME) {
/* correct the time using the phase offset */
time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
NTP_INTERVAL_FREQ);
/* cancel running adjtime() */
time_adjust = 0;
}
/* update jitter */
pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
}
/*
* __hardpps() - discipline CPU clock oscillator to external PPS signal
*
* This routine is called at each PPS signal arrival in order to
* discipline the CPU clock oscillator to the PPS signal. It takes two
* parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former
* is used to correct clock phase error and the latter is used to
* correct the frequency.
*
* This code is based on David Mills's reference nanokernel
* implementation. It was mostly rewritten but keeps the same idea.
*/
void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
{
struct pps_normtime pts_norm, freq_norm;
pts_norm = pps_normalize_ts(*phase_ts);
/* clear the error bits, they will be set again if needed */
time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
/* indicate signal presence */
time_status |= STA_PPSSIGNAL;
pps_valid = PPS_VALID;
/* when called for the first time,
* just start the frequency interval */
if (unlikely(pps_fbase.tv_sec == 0)) {
pps_fbase = *raw_ts;
return;
}
/* ok, now we have a base for frequency calculation */
freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
/* check that the signal is in the range
* [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
if ((freq_norm.sec == 0) ||
(freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
(freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");
return;
}
/* signal is ok */
/* check if the current frequency interval is finished */
if (freq_norm.sec >= (1 << pps_shift)) {
pps_calcnt++;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
hardpps_update_freq(freq_norm);
}
hardpps_update_phase(pts_norm.nsec);
}
#endif /* CONFIG_NTP_PPS */
static int __init ntp_tick_adj_setup(char *str)
{
int rc = kstrtol(str, 0, (long *)&ntp_tick_adj);
if (rc)
return rc;
ntp_tick_adj <<= NTP_SCALE_SHIFT;
return 1;
}
__setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
}

View file

@ -0,0 +1,12 @@
#ifndef _LINUX_NTP_INTERNAL_H
#define _LINUX_NTP_INTERNAL_H
extern void ntp_init(void);
extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);
extern int second_overflow(unsigned long secs);
extern int ntp_validate_timex(struct timex *);
extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
extern void __hardpps(const struct timespec *, const struct timespec *);
#endif /* _LINUX_NTP_INTERNAL_H */

446
kernel/time/posix-clock.c Normal file
View file

@ -0,0 +1,446 @@
/*
* posix-clock.c - support for dynamic clock devices
*
* Copyright (C) 2010 OMICRON electronics GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/device.h>
#include <linux/export.h>
#include <linux/file.h>
#include <linux/posix-clock.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
static void delete_clock(struct kref *kref);
/*
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
*/
static struct posix_clock *get_posix_clock(struct file *fp)
{
struct posix_clock *clk = fp->private_data;
down_read(&clk->rwsem);
if (!clk->zombie)
return clk;
up_read(&clk->rwsem);
return NULL;
}
static void put_posix_clock(struct posix_clock *clk)
{
up_read(&clk->rwsem);
}
static ssize_t posix_clock_read(struct file *fp, char __user *buf,
size_t count, loff_t *ppos)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -EINVAL;
if (!clk)
return -ENODEV;
if (clk->ops.read)
err = clk->ops.read(clk, fp->f_flags, buf, count);
put_posix_clock(clk);
return err;
}
static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
{
struct posix_clock *clk = get_posix_clock(fp);
int result = 0;
if (!clk)
return -ENODEV;
if (clk->ops.poll)
result = clk->ops.poll(clk, fp, wait);
put_posix_clock(clk);
return result;
}
static int posix_clock_fasync(int fd, struct file *fp, int on)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = 0;
if (!clk)
return -ENODEV;
if (clk->ops.fasync)
err = clk->ops.fasync(clk, fd, fp, on);
put_posix_clock(clk);
return err;
}
static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -ENODEV;
if (!clk)
return -ENODEV;
if (clk->ops.mmap)
err = clk->ops.mmap(clk, vma);
put_posix_clock(clk);
return err;
}
static long posix_clock_ioctl(struct file *fp,
unsigned int cmd, unsigned long arg)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -ENOTTY;
if (!clk)
return -ENODEV;
if (clk->ops.ioctl)
err = clk->ops.ioctl(clk, cmd, arg);
put_posix_clock(clk);
return err;
}
#ifdef CONFIG_COMPAT
static long posix_clock_compat_ioctl(struct file *fp,
unsigned int cmd, unsigned long arg)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -ENOTTY;
if (!clk)
return -ENODEV;
if (clk->ops.ioctl)
err = clk->ops.ioctl(clk, cmd, arg);
put_posix_clock(clk);
return err;
}
#endif
static int posix_clock_open(struct inode *inode, struct file *fp)
{
int err;
struct posix_clock *clk =
container_of(inode->i_cdev, struct posix_clock, cdev);
down_read(&clk->rwsem);
if (clk->zombie) {
err = -ENODEV;
goto out;
}
if (clk->ops.open)
err = clk->ops.open(clk, fp->f_mode);
else
err = 0;
if (!err) {
kref_get(&clk->kref);
fp->private_data = clk;
}
out:
up_read(&clk->rwsem);
return err;
}
static int posix_clock_release(struct inode *inode, struct file *fp)
{
struct posix_clock *clk = fp->private_data;
int err = 0;
if (clk->ops.release)
err = clk->ops.release(clk);
kref_put(&clk->kref, delete_clock);
fp->private_data = NULL;
return err;
}
static const struct file_operations posix_clock_file_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read = posix_clock_read,
.poll = posix_clock_poll,
.unlocked_ioctl = posix_clock_ioctl,
.open = posix_clock_open,
.release = posix_clock_release,
.fasync = posix_clock_fasync,
.mmap = posix_clock_mmap,
#ifdef CONFIG_COMPAT
.compat_ioctl = posix_clock_compat_ioctl,
#endif
};
int posix_clock_register(struct posix_clock *clk, dev_t devid)
{
int err;
kref_init(&clk->kref);
init_rwsem(&clk->rwsem);
cdev_init(&clk->cdev, &posix_clock_file_operations);
clk->cdev.owner = clk->ops.owner;
err = cdev_add(&clk->cdev, devid, 1);
return err;
}
EXPORT_SYMBOL_GPL(posix_clock_register);
static void delete_clock(struct kref *kref)
{
struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
if (clk->release)
clk->release(clk);
}
void posix_clock_unregister(struct posix_clock *clk)
{
cdev_del(&clk->cdev);
down_write(&clk->rwsem);
clk->zombie = true;
up_write(&clk->rwsem);
kref_put(&clk->kref, delete_clock);
}
EXPORT_SYMBOL_GPL(posix_clock_unregister);
struct posix_clock_desc {
struct file *fp;
struct posix_clock *clk;
};
static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
{
struct file *fp = fget(CLOCKID_TO_FD(id));
int err = -EINVAL;
if (!fp)
return err;
if (fp->f_op->open != posix_clock_open || !fp->private_data)
goto out;
cd->fp = fp;
cd->clk = get_posix_clock(fp);
err = cd->clk ? 0 : -ENODEV;
out:
if (err)
fput(fp);
return err;
}
static void put_clock_desc(struct posix_clock_desc *cd)
{
put_posix_clock(cd->clk);
fput(cd->fp);
}
static int pc_clock_adjtime(clockid_t id, struct timex *tx)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
err = -EACCES;
goto out;
}
if (cd.clk->ops.clock_adjtime)
err = cd.clk->ops.clock_adjtime(cd.clk, tx);
else
err = -EOPNOTSUPP;
out:
put_clock_desc(&cd);
return err;
}
static int pc_clock_gettime(clockid_t id, struct timespec *ts)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.clock_gettime)
err = cd.clk->ops.clock_gettime(cd.clk, ts);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static int pc_clock_getres(clockid_t id, struct timespec *ts)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.clock_getres)
err = cd.clk->ops.clock_getres(cd.clk, ts);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static int pc_clock_settime(clockid_t id, const struct timespec *ts)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
err = -EACCES;
goto out;
}
if (cd.clk->ops.clock_settime)
err = cd.clk->ops.clock_settime(cd.clk, ts);
else
err = -EOPNOTSUPP;
out:
put_clock_desc(&cd);
return err;
}
static int pc_timer_create(struct k_itimer *kit)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.timer_create)
err = cd.clk->ops.timer_create(cd.clk, kit);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static int pc_timer_delete(struct k_itimer *kit)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.timer_delete)
err = cd.clk->ops.timer_delete(cd.clk, kit);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
if (get_clock_desc(id, &cd))
return;
if (cd.clk->ops.timer_gettime)
cd.clk->ops.timer_gettime(cd.clk, kit, ts);
put_clock_desc(&cd);
}
static int pc_timer_settime(struct k_itimer *kit, int flags,
struct itimerspec *ts, struct itimerspec *old)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.timer_settime)
err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
struct k_clock clock_posix_dynamic = {
.clock_getres = pc_clock_getres,
.clock_set = pc_clock_settime,
.clock_get = pc_clock_gettime,
.clock_adj = pc_clock_adjtime,
.timer_create = pc_timer_create,
.timer_set = pc_timer_settime,
.timer_del = pc_timer_delete,
.timer_get = pc_timer_gettime,
};

File diff suppressed because it is too large Load diff

1124
kernel/time/posix-timers.c Normal file

File diff suppressed because it is too large Load diff

217
kernel/time/sched_clock.c Normal file
View file

@ -0,0 +1,217 @@
/*
* sched_clock.c: support for extending counters to full 64-bit ns counter
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/clocksource.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/kernel.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/syscore_ops.h>
#include <linux/hrtimer.h>
#include <linux/sched_clock.h>
#include <linux/seqlock.h>
#include <linux/bitops.h>
struct clock_data {
ktime_t wrap_kt;
u64 epoch_ns;
u64 epoch_cyc;
seqcount_t seq;
unsigned long rate;
u32 mult;
u32 shift;
bool suspended;
};
static struct hrtimer sched_clock_timer;
static int irqtime = -1;
core_param(irqtime, irqtime, int, 0400);
static struct clock_data cd = {
.mult = NSEC_PER_SEC / HZ,
};
static u64 __read_mostly sched_clock_mask;
static u64 notrace jiffy_sched_clock_read(void)
{
/*
* We don't need to use get_jiffies_64 on 32-bit arches here
* because we register with BITS_PER_LONG
*/
return (u64)(jiffies - INITIAL_JIFFIES);
}
static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
return (cyc * mult) >> shift;
}
unsigned long long notrace sched_clock(void)
{
u64 epoch_ns;
u64 epoch_cyc;
u64 cyc;
unsigned long seq;
if (cd.suspended)
return cd.epoch_ns;
do {
seq = raw_read_seqcount_begin(&cd.seq);
epoch_cyc = cd.epoch_cyc;
epoch_ns = cd.epoch_ns;
} while (read_seqcount_retry(&cd.seq, seq));
cyc = read_sched_clock();
cyc = (cyc - epoch_cyc) & sched_clock_mask;
return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
}
/*
* Atomically update the sched_clock epoch.
*/
static void notrace update_sched_clock(void)
{
unsigned long flags;
u64 cyc;
u64 ns;
cyc = read_sched_clock();
ns = cd.epoch_ns +
cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
raw_local_irq_save(flags);
raw_write_seqcount_begin(&cd.seq);
cd.epoch_ns = ns;
cd.epoch_cyc = cyc;
raw_write_seqcount_end(&cd.seq);
raw_local_irq_restore(flags);
}
static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
{
update_sched_clock();
hrtimer_forward_now(hrt, cd.wrap_kt);
return HRTIMER_RESTART;
}
void __init sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
ktime_t new_wrap_kt;
unsigned long r;
char r_unit;
if (cd.rate > rate)
return;
WARN_ON(!irqs_disabled());
/* calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
new_mask = CLOCKSOURCE_MASK(bits);
/* calculate how many ns until we wrap */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
/* update epoch for new counter and update epoch_ns from old counter*/
new_epoch = read();
cyc = read_sched_clock();
ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
raw_write_seqcount_begin(&cd.seq);
read_sched_clock = read;
sched_clock_mask = new_mask;
cd.rate = rate;
cd.wrap_kt = new_wrap_kt;
cd.mult = new_mult;
cd.shift = new_shift;
cd.epoch_cyc = new_epoch;
cd.epoch_ns = ns;
raw_write_seqcount_end(&cd.seq);
r = rate;
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
} else if (r >= 1000) {
r /= 1000;
r_unit = 'k';
} else
r_unit = ' ';
/* calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, new_mult, new_shift);
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
bits, r, r_unit, res, wrap);
/* Enable IRQ time accounting if we have a fast enough sched_clock */
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
pr_debug("Registered %pF as sched_clock source\n", read);
}
void __init sched_clock_postinit(void)
{
/*
* If no sched_clock function has been provided at that point,
* make it the final one one.
*/
if (read_sched_clock == jiffy_sched_clock_read)
sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
update_sched_clock();
/*
* Start the timer to keep sched_clock() properly updated and
* sets the initial epoch.
*/
hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sched_clock_timer.function = sched_clock_poll;
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
}
static int sched_clock_suspend(void)
{
update_sched_clock();
hrtimer_cancel(&sched_clock_timer);
cd.suspended = true;
return 0;
}
static void sched_clock_resume(void)
{
cd.epoch_cyc = read_sched_clock();
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
cd.suspended = false;
}
static struct syscore_ops sched_clock_ops = {
.suspend = sched_clock_suspend,
.resume = sched_clock_resume,
};
static int __init sched_clock_syscore_init(void)
{
register_syscore_ops(&sched_clock_ops);
return 0;
}
device_initcall(sched_clock_syscore_init);

View file

@ -0,0 +1,113 @@
/*
* linux/kernel/time/tick-broadcast-hrtimer.c
* This file emulates a local clock event device
* via a pseudo clock device.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/clockchips.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/module.h>
#include "tick-internal.h"
static struct hrtimer bctimer;
static void bc_set_mode(enum clock_event_mode mode,
struct clock_event_device *bc)
{
switch (mode) {
case CLOCK_EVT_MODE_SHUTDOWN:
/*
* Note, we cannot cancel the timer here as we might
* run into the following live lock scenario:
*
* cpu 0 cpu1
* lock(broadcast_lock);
* hrtimer_interrupt()
* bc_handler()
* tick_handle_oneshot_broadcast();
* lock(broadcast_lock);
* hrtimer_cancel()
* wait_for_callback()
*/
hrtimer_try_to_cancel(&bctimer);
break;
default:
break;
}
}
/*
* This is called from the guts of the broadcast code when the cpu
* which is about to enter idle has the earliest broadcast timer event.
*/
static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
{
int bc_moved;
/*
* We try to cancel the timer first. If the callback is on
* flight on some other cpu then we let it handle it. If we
* were able to cancel the timer nothing can rearm it as we
* own broadcast_lock.
*
* However we can also be called from the event handler of
* ce_broadcast_hrtimer itself when it expires. We cannot
* restart the timer because we are in the callback, but we
* can set the expiry time and let the callback return
* HRTIMER_RESTART.
*
* Since we are in the idle loop at this point and because
* hrtimer_{start/cancel} functions call into tracing,
* calls to these functions must be bound within RCU_NONIDLE.
*/
RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ?
!hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) :
0);
if (bc_moved) {
/* Bind the "device" to the cpu */
bc->bound_on = smp_processor_id();
} else if (bc->bound_on == smp_processor_id()) {
hrtimer_set_expires(&bctimer, expires);
}
return 0;
}
static struct clock_event_device ce_broadcast_hrtimer = {
.set_mode = bc_set_mode,
.set_next_ktime = bc_set_next,
.features = CLOCK_EVT_FEAT_ONESHOT |
CLOCK_EVT_FEAT_KTIME |
CLOCK_EVT_FEAT_HRTIMER,
.rating = 0,
.bound_on = -1,
.min_delta_ns = 1,
.max_delta_ns = KTIME_MAX,
.min_delta_ticks = 1,
.max_delta_ticks = ULONG_MAX,
.mult = 1,
.shift = 0,
.cpumask = cpu_all_mask,
};
static enum hrtimer_restart bc_handler(struct hrtimer *t)
{
ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX)
return HRTIMER_NORESTART;
return HRTIMER_RESTART;
}
void tick_setup_hrtimer_broadcast(void)
{
hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
bctimer.function = bc_handler;
clockevents_register_device(&ce_broadcast_hrtimer);
}

View file

@ -0,0 +1,951 @@
/*
* linux/kernel/time/tick-broadcast.c
*
* This file contains functions which emulate a local clock-event
* device via a broadcast event source.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/module.h>
#include "tick-internal.h"
/*
* Broadcast support for broken x86 hardware, where the local apic
* timer stops in C3 state.
*/
static struct tick_device tick_broadcast_device;
static cpumask_var_t tick_broadcast_mask;
static cpumask_var_t tick_broadcast_on;
static cpumask_var_t tmpmask;
static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
#ifdef CONFIG_TICK_ONESHOT
static void tick_broadcast_clear_oneshot(int cpu);
#else
static inline void tick_broadcast_clear_oneshot(int cpu) { }
#endif
/*
* Debugging: see timer_list.c
*/
struct tick_device *tick_get_broadcast_device(void)
{
return &tick_broadcast_device;
}
struct cpumask *tick_get_broadcast_mask(void)
{
return tick_broadcast_mask;
}
/*
* Start the device in periodic mode
*/
static void tick_broadcast_start_periodic(struct clock_event_device *bc)
{
if (bc)
tick_setup_periodic(bc, 1);
}
/*
* Check, if the device can be utilized as broadcast device:
*/
static bool tick_check_broadcast_device(struct clock_event_device *curdev,
struct clock_event_device *newdev)
{
if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
(newdev->features & CLOCK_EVT_FEAT_C3STOP))
return false;
if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
!(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
return false;
return !curdev || newdev->rating > curdev->rating;
}
/*
* Conditionally install/replace broadcast device
*/
void tick_install_broadcast_device(struct clock_event_device *dev)
{
struct clock_event_device *cur = tick_broadcast_device.evtdev;
if (!tick_check_broadcast_device(cur, dev))
return;
if (!try_module_get(dev->owner))
return;
clockevents_exchange_device(cur, dev);
if (cur)
cur->event_handler = clockevents_handle_noop;
tick_broadcast_device.evtdev = dev;
if (!cpumask_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(dev);
/*
* Inform all cpus about this. We might be in a situation
* where we did not switch to oneshot mode because the per cpu
* devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
* of a oneshot capable broadcast device. Without that
* notification the systems stays stuck in periodic mode
* forever.
*/
if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_clock_notify();
}
/*
* Check, if the device is the broadcast device
*/
int tick_is_broadcast_device(struct clock_event_device *dev)
{
return (dev && tick_broadcast_device.evtdev == dev);
}
int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
{
int ret = -ENODEV;
if (tick_is_broadcast_device(dev)) {
raw_spin_lock(&tick_broadcast_lock);
ret = __clockevents_update_freq(dev, freq);
raw_spin_unlock(&tick_broadcast_lock);
}
return ret;
}
static void err_broadcast(const struct cpumask *mask)
{
pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
}
static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
{
if (!dev->broadcast)
dev->broadcast = tick_broadcast;
if (!dev->broadcast) {
pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
dev->name);
dev->broadcast = err_broadcast;
}
}
/*
* Check, if the device is disfunctional and a place holder, which
* needs to be handled by the broadcast device.
*/
int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Devices might be registered with both periodic and oneshot
* mode disabled. This signals, that the device needs to be
* operated from the broadcast device and is a placeholder for
* the cpu local device.
*/
if (!tick_device_is_functional(dev)) {
dev->event_handler = tick_handle_periodic;
tick_device_setup_broadcast_func(dev);
cpumask_set_cpu(cpu, tick_broadcast_mask);
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
tick_broadcast_setup_oneshot(bc);
ret = 1;
} else {
/*
* Clear the broadcast bit for this cpu if the
* device is not power state affected.
*/
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
cpumask_clear_cpu(cpu, tick_broadcast_mask);
else
tick_device_setup_broadcast_func(dev);
/*
* Clear the broadcast bit if the CPU is not in
* periodic broadcast on state.
*/
if (!cpumask_test_cpu(cpu, tick_broadcast_on))
cpumask_clear_cpu(cpu, tick_broadcast_mask);
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_ONESHOT:
/*
* If the system is in oneshot mode we can
* unconditionally clear the oneshot mask bit,
* because the CPU is running and therefore
* not in an idle state which causes the power
* state affected device to stop. Let the
* caller initialize the device.
*/
tick_broadcast_clear_oneshot(cpu);
ret = 0;
break;
case TICKDEV_MODE_PERIODIC:
/*
* If the system is in periodic mode, check
* whether the broadcast device can be
* switched off now.
*/
if (cpumask_empty(tick_broadcast_mask) && bc)
clockevents_shutdown(bc);
/*
* If we kept the cpu in the broadcast mask,
* tell the caller to leave the per cpu device
* in shutdown state. The periodic interrupt
* is delivered by the broadcast device.
*/
ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
break;
default:
/* Nothing to do */
ret = 0;
break;
}
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
int tick_receive_broadcast(void)
{
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
struct clock_event_device *evt = td->evtdev;
if (!evt)
return -ENODEV;
if (!evt->event_handler)
return -EINVAL;
evt->event_handler(evt);
return 0;
}
#endif
/*
* Broadcast the event to the cpus, which are set in the mask (mangled).
*/
static void tick_do_broadcast(struct cpumask *mask)
{
int cpu = smp_processor_id();
struct tick_device *td;
/*
* Check, if the current cpu is in the mask
*/
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->event_handler(td->evtdev);
}
if (!cpumask_empty(mask)) {
/*
* It might be necessary to actually check whether the devices
* have different broadcast functions. For now, just use the
* one of the first device. This works as long as we have this
* misfeature only on x86 (lapic)
*/
td = &per_cpu(tick_cpu_device, cpumask_first(mask));
td->evtdev->broadcast(mask);
}
}
/*
* Periodic broadcast:
* - invoke the broadcast handlers
*/
static void tick_do_periodic_broadcast(void)
{
cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
tick_do_broadcast(tmpmask);
}
/*
* Event handler for periodic broadcast ticks
*/
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
ktime_t next;
raw_spin_lock(&tick_broadcast_lock);
tick_do_periodic_broadcast();
/*
* The device is in periodic mode. No reprogramming necessary:
*/
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
goto unlock;
/*
* Setup the next period for devices, which do not have
* periodic mode. We read dev->next_event first and add to it
* when the event already expired. clockevents_program_event()
* sets dev->next_event only when the event is really
* programmed to the device.
*/
for (next = dev->next_event; ;) {
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, false))
goto unlock;
tick_do_periodic_broadcast();
}
unlock:
raw_spin_unlock(&tick_broadcast_lock);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
static void tick_do_broadcast_on_off(unsigned long *reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
int cpu, bc_stopped;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
bc = tick_broadcast_device.evtdev;
/*
* Is the device not affected by the powerstate ?
*/
if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
goto out;
if (!tick_device_is_functional(dev))
goto out;
bc_stopped = cpumask_empty(tick_broadcast_mask);
switch (*reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
cpumask_set_cpu(cpu, tick_broadcast_on);
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
clockevents_shutdown(dev);
}
if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
tick_broadcast_force = 1;
break;
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
if (tick_broadcast_force)
break;
cpumask_clear_cpu(cpu, tick_broadcast_on);
if (!tick_device_is_functional(dev))
break;
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
}
break;
}
if (cpumask_empty(tick_broadcast_mask)) {
if (!bc_stopped)
clockevents_shutdown(bc);
} else if (bc_stopped) {
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
tick_broadcast_setup_oneshot(bc);
}
out:
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop.
*/
void tick_broadcast_on_off(unsigned long reason, int *oncpu)
{
if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
"offline CPU #%d\n", *oncpu);
else
tick_do_broadcast_on_off(&reason);
}
/*
* Set the periodic handler depending on broadcast on/off
*/
void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
{
if (!broadcast)
dev->event_handler = tick_handle_periodic;
else
dev->event_handler = tick_handle_periodic_broadcast;
}
/*
* Remove a CPU from broadcasting
*/
void tick_shutdown_broadcast(unsigned int *cpup)
{
struct clock_event_device *bc;
unsigned long flags;
unsigned int cpu = *cpup;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpumask_clear_cpu(cpu, tick_broadcast_mask);
cpumask_clear_cpu(cpu, tick_broadcast_on);
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpumask_empty(tick_broadcast_mask))
clockevents_shutdown(bc);
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
void tick_suspend_broadcast(void)
{
struct clock_event_device *bc;
unsigned long flags;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc)
clockevents_shutdown(bc);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
int tick_resume_broadcast(void)
{
struct clock_event_device *bc;
unsigned long flags;
int broadcast = 0;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc) {
clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_PERIODIC:
if (!cpumask_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(bc);
broadcast = cpumask_test_cpu(smp_processor_id(),
tick_broadcast_mask);
break;
case TICKDEV_MODE_ONESHOT:
if (!cpumask_empty(tick_broadcast_mask))
broadcast = tick_resume_broadcast_oneshot(bc);
break;
}
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return broadcast;
}
#ifdef CONFIG_TICK_ONESHOT
static cpumask_var_t tick_broadcast_oneshot_mask;
static cpumask_var_t tick_broadcast_pending_mask;
static cpumask_var_t tick_broadcast_force_mask;
/*
* Exposed for debugging: see timer_list.c
*/
struct cpumask *tick_get_broadcast_oneshot_mask(void)
{
return tick_broadcast_oneshot_mask;
}
/*
* Called before going idle with interrupts disabled. Checks whether a
* broadcast event from the other core is about to happen. We detected
* that in tick_broadcast_oneshot_control(). The callsite can use this
* to avoid a deep idle transition as we are about to get the
* broadcast IPI right away.
*/
int tick_check_broadcast_expired(void)
{
return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
}
/*
* Set broadcast interrupt affinity
*/
static void tick_broadcast_set_affinity(struct clock_event_device *bc,
const struct cpumask *cpumask)
{
if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
return;
if (cpumask_equal(bc->cpumask, cpumask))
return;
bc->cpumask = cpumask;
irq_set_affinity(bc->irq, bc->cpumask);
}
static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
ktime_t expires, int force)
{
int ret;
if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
ret = clockevents_program_event(bc, expires, force);
if (!ret)
tick_broadcast_set_affinity(bc, cpumask_of(cpu));
return ret;
}
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
return 0;
}
/*
* Called from irq_enter() when idle was interrupted to reenable the
* per cpu device.
*/
void tick_check_oneshot_broadcast_this_cpu(void)
{
if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
/*
* We might be in the middle of switching over from
* periodic to oneshot. If the CPU has not yet
* switched over, leave the device alone.
*/
if (td->mode == TICKDEV_MODE_ONESHOT) {
clockevents_set_mode(td->evtdev,
CLOCK_EVT_MODE_ONESHOT);
}
}
}
/*
* Handle oneshot mode broadcasting
*/
static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
{
struct tick_device *td;
ktime_t now, next_event;
int cpu, next_cpu = 0;
raw_spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
cpumask_clear(tmpmask);
now = ktime_get();
/* Find all expired events */
for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64) {
cpumask_set_cpu(cpu, tmpmask);
/*
* Mark the remote cpu in the pending mask, so
* it can avoid reprogramming the cpu local
* timer in tick_broadcast_oneshot_control().
*/
cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
next_event.tv64 = td->evtdev->next_event.tv64;
next_cpu = cpu;
}
}
/*
* Remove the current cpu from the pending mask. The event is
* delivered immediately in tick_do_broadcast() !
*/
cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
/* Take care of enforced broadcast requests */
cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
cpumask_clear(tick_broadcast_force_mask);
/*
* Sanity check. Catch the case where we try to broadcast to
* offline cpus.
*/
if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
cpumask_and(tmpmask, tmpmask, cpu_online_mask);
/*
* Wakeup the cpus which have an expired event.
*/
tick_do_broadcast(tmpmask);
/*
* Two reasons for reprogram:
*
* - The global event did not expire any CPU local
* events. This happens in dyntick mode, as the maximum PIT
* delta is quite small.
*
* - There are pending events on sleeping CPUs which were not
* in the event mask
*/
if (next_event.tv64 != KTIME_MAX) {
/*
* Rearm the broadcast device. If event expired,
* repeat the above
*/
if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
goto again;
}
raw_spin_unlock(&tick_broadcast_lock);
}
static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
{
if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
return 0;
if (bc->next_event.tv64 == KTIME_MAX)
return 0;
return bc->bound_on == cpu ? -EBUSY : 0;
}
static void broadcast_shutdown_local(struct clock_event_device *bc,
struct clock_event_device *dev)
{
/*
* For hrtimer based broadcasting we cannot shutdown the cpu
* local device if our own event is the first one to expire or
* if we own the broadcast timer.
*/
if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
if (broadcast_needs_cpu(bc, smp_processor_id()))
return;
if (dev->next_event.tv64 < bc->next_event.tv64)
return;
}
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
}
static void broadcast_move_bc(int deadcpu)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
if (!bc || !broadcast_needs_cpu(bc, deadcpu))
return;
/* This moves the broadcast assignment to this cpu */
clockevents_program_event(bc, bc->next_event, 1);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
* Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
*/
int tick_broadcast_oneshot_control(unsigned long reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
ktime_t now;
int cpu, ret = 0;
/*
* Periodic mode does not care about the enter/exit of power
* states
*/
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
return 0;
/*
* We are called with preemtion disabled from the depth of the
* idle code, so we can't be moved away.
*/
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
return 0;
bc = tick_broadcast_device.evtdev;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
broadcast_shutdown_local(bc, dev);
/*
* We only reprogram the broadcast timer if we
* did not mark ourself in the force mask and
* if the cpu local event is earlier than the
* broadcast event. If the current CPU is in
* the force mask, then we are going to be
* woken by the IPI right away.
*/
if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
}
/*
* If the current CPU owns the hrtimer broadcast
* mechanism, it cannot go deep idle and we remove the
* CPU from the broadcast mask. We don't have to go
* through the EXIT path as the local timer is not
* shutdown.
*/
ret = broadcast_needs_cpu(bc, cpu);
if (ret)
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
} else {
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
/*
* The cpu which was handling the broadcast
* timer marked this cpu in the broadcast
* pending mask and fired the broadcast
* IPI. So we are going to handle the expired
* event anyway via the broadcast IPI
* handler. No need to reprogram the timer
* with an already expired event.
*/
if (cpumask_test_and_clear_cpu(cpu,
tick_broadcast_pending_mask))
goto out;
/*
* Bail out if there is no next event.
*/
if (dev->next_event.tv64 == KTIME_MAX)
goto out;
/*
* If the pending bit is not set, then we are
* either the CPU handling the broadcast
* interrupt or we got woken by something else.
*
* We are not longer in the broadcast mask, so
* if the cpu local expiry time is already
* reached, we would reprogram the cpu local
* timer with an already expired event.
*
* This can lead to a ping-pong when we return
* to idle and therefor rearm the broadcast
* timer before the cpu local timer was able
* to fire. This happens because the forced
* reprogramming makes sure that the event
* will happen in the future and depending on
* the min_delta setting this might be far
* enough out that the ping-pong starts.
*
* If the cpu local next_event has expired
* then we know that the broadcast timer
* next_event has expired as well and
* broadcast is about to be handled. So we
* avoid reprogramming and enforce that the
* broadcast handler, which did not run yet,
* will invoke the cpu local handler.
*
* We cannot call the handler directly from
* here, because we might be in a NOHZ phase
* and we did not go through the irq_enter()
* nohz fixups.
*/
now = ktime_get();
if (dev->next_event.tv64 <= now.tv64) {
cpumask_set_cpu(cpu, tick_broadcast_force_mask);
goto out;
}
/*
* We got woken by something else. Reprogram
* the cpu local timer device.
*/
tick_program_event(dev->next_event, 1);
}
}
out:
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
/*
* Reset the one shot broadcast for a cpu
*
* Called with tick_broadcast_lock held
*/
static void tick_broadcast_clear_oneshot(int cpu)
{
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
}
static void tick_broadcast_init_next_event(struct cpumask *mask,
ktime_t expires)
{
struct tick_device *td;
int cpu;
for_each_cpu(cpu, mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev)
td->evtdev->next_event = expires;
}
}
/**
* tick_broadcast_setup_oneshot - setup the broadcast device
*/
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
int cpu = smp_processor_id();
/* Set it up only once ! */
if (bc->event_handler != tick_handle_oneshot_broadcast) {
int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
bc->event_handler = tick_handle_oneshot_broadcast;
/*
* We must be careful here. There might be other CPUs
* waiting for periodic broadcast. We need to set the
* oneshot_mask bits for those and program the
* broadcast device to fire.
*/
cpumask_copy(tmpmask, tick_broadcast_mask);
cpumask_clear_cpu(cpu, tmpmask);
cpumask_or(tick_broadcast_oneshot_mask,
tick_broadcast_oneshot_mask, tmpmask);
if (was_periodic && !cpumask_empty(tmpmask)) {
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
tick_broadcast_init_next_event(tmpmask,
tick_next_period);
tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
} else
bc->next_event.tv64 = KTIME_MAX;
} else {
/*
* The first cpu which switches to oneshot mode sets
* the bit for all other cpus which are in the general
* (periodic) broadcast mask. So the bit is set and
* would prevent the first broadcast enter after this
* to program the bc device.
*/
tick_broadcast_clear_oneshot(cpu);
}
}
/*
* Select oneshot operating mode for the broadcast device
*/
void tick_broadcast_switch_to_oneshot(void)
{
struct clock_event_device *bc;
unsigned long flags;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Remove a dead CPU from broadcasting
*/
void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
{
unsigned long flags;
unsigned int cpu = *cpup;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Clear the broadcast masks for the dead cpu, but do not stop
* the broadcast device!
*/
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
broadcast_move_bc(cpu);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Check, whether the broadcast device is in one shot mode
*/
int tick_broadcast_oneshot_active(void)
{
return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
}
/*
* Check whether the broadcast device supports oneshot.
*/
bool tick_broadcast_oneshot_available(void)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
}
#endif
void __init tick_broadcast_init(void)
{
zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
#ifdef CONFIG_TICK_ONESHOT
zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
#endif
}

404
kernel/time/tick-common.c Normal file
View file

@ -0,0 +1,404 @@
/*
* linux/kernel/time/tick-common.c
*
* This file contains the base functions to manage periodic tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <asm/irq_regs.h>
#include "tick-internal.h"
/*
* Tick devices
*/
DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
/*
* Tick next event: keeps track of the tick time
*/
ktime_t tick_next_period;
ktime_t tick_period;
/*
* tick_do_timer_cpu is a timer core internal variable which holds the CPU NR
* which is responsible for calling do_timer(), i.e. the timekeeping stuff. This
* variable has two functions:
*
* 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the
* timekeeping lock all at once. Only the CPU which is assigned to do the
* update is handling it.
*
* 2) Hand off the duty in the NOHZ idle case by setting the value to
* TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks
* at it will take over and keep the time keeping alive. The handover
* procedure also covers cpu hotplug.
*/
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
/*
* Debugging: see timer_list.c
*/
struct tick_device *tick_get_device(int cpu)
{
return &per_cpu(tick_cpu_device, cpu);
}
/**
* tick_is_oneshot_available - check for a oneshot capable event device
*/
int tick_is_oneshot_available(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return 0;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
return 1;
return tick_broadcast_oneshot_available();
}
/*
* Periodic tick
*/
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
write_seqlock(&jiffies_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1);
write_sequnlock(&jiffies_lock);
update_wall_time();
}
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
}
/*
* Event handler for periodic ticks
*/
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next = dev->next_event;
tick_periodic(cpu);
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return;
for (;;) {
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
* before we call tick_periodic() in a loop, we need
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
* which then will increment time, possibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
}
}
/*
* Setup the device for a periodic tick
*/
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast);
/* Broadcast setup ? */
if (!tick_device_is_functional(dev))
return;
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
} else {
unsigned long seq;
ktime_t next;
do {
seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq));
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
next = ktime_add(next, tick_period);
}
}
}
/*
* Setup the tick device
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
/*
* First device setup ?
*/
if (!td->evtdev) {
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
if (!tick_nohz_full_cpu(cpu))
tick_do_timer_cpu = cpu;
else
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
}
/*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC;
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
}
td->evtdev = newdev;
/*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way. This function also returns !=0 when we keep the
* current active broadcast state for this CPU.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);
else
tick_setup_oneshot(newdev, handler, next_event);
}
void tick_install_replacement(struct clock_event_device *newdev)
{
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
int cpu = smp_processor_id();
clockevents_exchange_device(td->evtdev, newdev);
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
}
static bool tick_check_percpu(struct clock_event_device *curdev,
struct clock_event_device *newdev, int cpu)
{
if (!cpumask_test_cpu(cpu, newdev->cpumask))
return false;
if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
return true;
/* Check if irq affinity can be set */
if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
return false;
/* Prefer an existing cpu local device */
if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
return false;
return true;
}
static bool tick_check_preferred(struct clock_event_device *curdev,
struct clock_event_device *newdev)
{
/* Prefer oneshot capable device */
if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
return false;
if (tick_oneshot_mode_active())
return false;
}
/*
* Use the higher rated one, but prefer a CPU local device with a lower
* rating than a non-CPU local device
*/
return !curdev ||
newdev->rating > curdev->rating ||
!cpumask_equal(curdev->cpumask, newdev->cpumask);
}
/*
* Check whether the new device is a better fit than curdev. curdev
* can be NULL !
*/
bool tick_check_replacement(struct clock_event_device *curdev,
struct clock_event_device *newdev)
{
if (!tick_check_percpu(curdev, newdev, smp_processor_id()))
return false;
return tick_check_preferred(curdev, newdev);
}
/*
* Check, if the new registered device should be used. Called with
* clockevents_lock held and interrupts disabled.
*/
void tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
int cpu;
cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, newdev->cpumask))
goto out_bc;
td = &per_cpu(tick_cpu_device, cpu);
curdev = td->evtdev;
/* cpu local device ? */
if (!tick_check_percpu(curdev, newdev, cpu))
goto out_bc;
/* Preference decision */
if (!tick_check_preferred(curdev, newdev))
goto out_bc;
if (!try_module_get(newdev->owner))
return;
/*
* Replace the eventually existing device by the new
* device. If the current device is the broadcast device, do
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
return;
out_bc:
/*
* Can the new device be used as a broadcast device ?
*/
tick_install_broadcast_device(newdev);
}
/*
* Transfer the do_timer job away from a dying cpu.
*
* Called with interrupts disabled.
*/
void tick_handover_do_timer(int *cpup)
{
if (*cpup == tick_do_timer_cpu) {
int cpu = cpumask_first(cpu_online_mask);
tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
TICK_DO_TIMER_NONE;
}
}
/*
* Shutdown an event device on a given cpu:
*
* This is called on a life CPU, when a CPU is dead. So we cannot
* access the hardware device itself.
* We just set the mode and remove it from the lists.
*/
void tick_shutdown(unsigned int *cpup)
{
struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
struct clock_event_device *dev = td->evtdev;
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
/*
* Prevent that the clock events layer tries to call
* the set mode function!
*/
dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_exchange_device(dev, NULL);
dev->event_handler = clockevents_handle_noop;
td->evtdev = NULL;
}
}
void tick_suspend(void)
{
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
clockevents_shutdown(td->evtdev);
}
void tick_resume(void)
{
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
int broadcast = tick_resume_broadcast();
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
if (!broadcast) {
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(td->evtdev, 0);
else
tick_resume_oneshot();
}
}
/**
* tick_init - initialize the tick control
*/
void __init tick_init(void)
{
tick_broadcast_init();
tick_nohz_init();
}

172
kernel/time/tick-internal.h Normal file
View file

@ -0,0 +1,172 @@
/*
* tick internal variable and functions used by low/high res code
*/
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include "timekeeping.h"
extern seqlock_t jiffies_lock;
#define CS_NAME_LEN 32
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#define TICK_DO_TIMER_NONE -1
#define TICK_DO_TIMER_BOOT -2
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
extern ktime_t tick_next_period;
extern ktime_t tick_period;
extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
extern void tick_check_new_device(struct clock_event_device *dev);
extern void tick_handover_do_timer(int *cpup);
extern void tick_shutdown(unsigned int *cpup);
extern void tick_suspend(void);
extern void tick_resume(void);
extern bool tick_check_replacement(struct clock_event_device *curdev,
struct clock_event_device *newdev);
extern void tick_install_replacement(struct clock_event_device *dev);
extern void clockevents_shutdown(struct clock_event_device *dev);
extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
/*
* NO_HZ / high resolution timer shared code
*/
#ifdef CONFIG_TICK_ONESHOT
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
extern void tick_resume_oneshot(void);
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern int tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
extern int tick_broadcast_oneshot_active(void);
extern void tick_check_oneshot_broadcast_this_cpu(void);
bool tick_broadcast_oneshot_available(void);
# else /* BROADCAST */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
static inline bool tick_broadcast_oneshot_available(void) { return true; }
# endif /* !BROADCAST */
#else /* !ONESHOT */
static inline
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt)
{
BUG();
}
static inline void tick_resume_oneshot(void)
{
BUG();
}
static inline int tick_program_event(ktime_t expires, int force)
{
return 0;
}
static inline void tick_oneshot_notify(void) { }
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
return 0;
}
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline bool tick_broadcast_oneshot_available(void) { return false; }
#endif /* !TICK_ONESHOT */
/* NO_HZ_FULL internal */
#ifdef CONFIG_NO_HZ_FULL
extern void tick_nohz_init(void);
# else
static inline void tick_nohz_init(void) { }
#endif
/*
* Broadcasting support
*/
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern void tick_install_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
extern void tick_shutdown_broadcast(unsigned int *cpup);
extern void tick_suspend_broadcast(void);
extern int tick_resume_broadcast(void);
extern void tick_broadcast_init(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
#else /* !BROADCAST */
static inline void tick_install_broadcast_device(struct clock_event_device *dev)
{
}
static inline int tick_is_broadcast_device(struct clock_event_device *dev)
{
return 0;
}
static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
int cpu)
{
return 0;
}
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
static inline void tick_suspend_broadcast(void) { }
static inline int tick_resume_broadcast(void) { return 0; }
static inline void tick_broadcast_init(void) { }
static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
u32 freq) { return -ENODEV; }
/*
* Set the periodic handler in non broadcast mode
*/
static inline void tick_set_periodic_handler(struct clock_event_device *dev,
int broadcast)
{
dev->event_handler = tick_handle_periodic;
}
#endif /* !BROADCAST */
/*
* Check, if the device is functional or a dummy for broadcast
*/
static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
#endif
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);

116
kernel/time/tick-oneshot.c Normal file
View file

@ -0,0 +1,116 @@
/*
* linux/kernel/time/tick-oneshot.c
*
* This file contains functions which manage high resolution tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include "tick-internal.h"
/**
* tick_program_event
*/
int tick_program_event(ktime_t expires, int force)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
return clockevents_program_event(dev, expires, force);
}
/**
* tick_resume_onshot - resume oneshot mode
*/
void tick_resume_oneshot(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
clockevents_program_event(dev, ktime_get(), true);
}
/**
* tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
*/
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t next_event)
{
newdev->event_handler = handler;
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
clockevents_program_event(newdev, next_event, true);
}
/**
* tick_switch_to_oneshot - switch to oneshot mode
*/
int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
{
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
struct clock_event_device *dev = td->evtdev;
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
!tick_device_is_functional(dev)) {
printk(KERN_INFO "Clockevents: "
"could not switch to one-shot mode:");
if (!dev) {
printk(" no tick device\n");
} else {
if (!tick_device_is_functional(dev))
printk(" %s is not functional.\n", dev->name);
else
printk(" %s does not support one-shot mode.\n",
dev->name);
}
return -EINVAL;
}
td->mode = TICKDEV_MODE_ONESHOT;
dev->event_handler = handler;
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
tick_broadcast_switch_to_oneshot();
return 0;
}
/**
* tick_check_oneshot_mode - check whether the system is in oneshot mode
*
* returns 1 when either nohz or highres are enabled. otherwise 0.
*/
int tick_oneshot_mode_active(void)
{
unsigned long flags;
int ret;
local_irq_save(flags);
ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT;
local_irq_restore(flags);
return ret;
}
#ifdef CONFIG_HIGH_RES_TIMERS
/**
* tick_init_highres - switch to high resolution mode
*
* Called with interrupts disabled.
*/
int tick_init_highres(void)
{
return tick_switch_to_oneshot(hrtimer_interrupt);
}
#endif

1252
kernel/time/tick-sched.c Normal file

File diff suppressed because it is too large Load diff

788
kernel/time/time.c Normal file
View file

@ -0,0 +1,788 @@
/*
* linux/kernel/time.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* This file contains the interface functions for the various
* time related system calls: time, stime, gettimeofday, settimeofday,
* adjtime
*/
/*
* Modification history kernel/time.c
*
* 1993-09-02 Philip Gladstone
* Created file with time related functions from sched/core.c and adjtimex()
* 1993-10-08 Torsten Duwe
* adjtime interface update and CMOS clock write code
* 1995-08-13 Torsten Duwe
* kernel PLL updated to 1994-12-13 specs (rfc-1589)
* 1999-01-16 Ulrich Windl
* Introduced error checking for many cases in adjtimex().
* Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
* Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
* (Even though the technical memorandum forbids it)
* 2004-07-14 Christoph Lameter
* Added getnstimeofday to allow the posix timer functions to return
* with nanosecond accuracy
*/
#include <linux/export.h>
#include <linux/timex.h>
#include <linux/capability.h>
#include <linux/timekeeper_internal.h>
#include <linux/errno.h>
#include <linux/syscalls.h>
#include <linux/security.h>
#include <linux/fs.h>
#include <linux/math64.h>
#include <linux/ptrace.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include "timeconst.h"
#include "timekeeping.h"
/*
* The timezone where the local system is located. Used as a default by some
* programs who obtain this value by using gettimeofday.
*/
struct timezone sys_tz;
EXPORT_SYMBOL(sys_tz);
#ifdef __ARCH_WANT_SYS_TIME
/*
* sys_time() can be implemented in user-level using
* sys_gettimeofday(). Is this for backwards compatibility? If so,
* why not move it into the appropriate arch directory (for those
* architectures that need it).
*/
SYSCALL_DEFINE1(time, time_t __user *, tloc)
{
time_t i = get_seconds();
if (tloc) {
if (put_user(i,tloc))
return -EFAULT;
}
force_successful_syscall_return();
return i;
}
/*
* sys_stime() can be implemented in user-level using
* sys_settimeofday(). Is this for backwards compatibility? If so,
* why not move it into the appropriate arch directory (for those
* architectures that need it).
*/
SYSCALL_DEFINE1(stime, time_t __user *, tptr)
{
struct timespec tv;
int err;
if (get_user(tv.tv_sec, tptr))
return -EFAULT;
tv.tv_nsec = 0;
err = security_settime(&tv, NULL);
if (err)
return err;
do_settimeofday(&tv);
return 0;
}
#endif /* __ARCH_WANT_SYS_TIME */
SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
struct timezone __user *, tz)
{
if (likely(tv != NULL)) {
struct timeval ktv;
do_gettimeofday(&ktv);
if (copy_to_user(tv, &ktv, sizeof(ktv)))
return -EFAULT;
}
if (unlikely(tz != NULL)) {
if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
return -EFAULT;
}
return 0;
}
/*
* Indicates if there is an offset between the system clock and the hardware
* clock/persistent clock/rtc.
*/
int persistent_clock_is_local;
/*
* Adjust the time obtained from the CMOS to be UTC time instead of
* local time.
*
* This is ugly, but preferable to the alternatives. Otherwise we
* would either need to write a program to do it in /etc/rc (and risk
* confusion if the program gets run more than once; it would also be
* hard to make the program warp the clock precisely n hours) or
* compile in the timezone information into the kernel. Bad, bad....
*
* - TYT, 1992-01-01
*
* The best thing to do is to keep the CMOS clock in universal time (UTC)
* as real UNIX machines always do it. This avoids all headaches about
* daylight saving times and warping kernel clocks.
*/
static inline void warp_clock(void)
{
if (sys_tz.tz_minuteswest != 0) {
struct timespec adjust;
persistent_clock_is_local = 1;
adjust.tv_sec = sys_tz.tz_minuteswest * 60;
adjust.tv_nsec = 0;
timekeeping_inject_offset(&adjust);
}
}
/*
* In case for some reason the CMOS clock has not already been running
* in UTC, but in some local time: The first time we set the timezone,
* we will warp the clock so that it is ticking UTC time instead of
* local time. Presumably, if someone is setting the timezone then we
* are running in an environment where the programs understand about
* timezones. This should be done at boot time in the /etc/rc script,
* as soon as possible, so that the clock can be set right. Otherwise,
* various programs will get confused when the clock gets warped.
*/
int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
{
static int firsttime = 1;
int error = 0;
if (tv && !timespec_valid(tv))
return -EINVAL;
error = security_settime(tv, tz);
if (error)
return error;
if (tz) {
sys_tz = *tz;
update_vsyscall_tz();
if (firsttime) {
firsttime = 0;
if (!tv)
warp_clock();
}
}
if (tv)
return do_settimeofday(tv);
return 0;
}
SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
struct timezone __user *, tz)
{
struct timeval user_tv;
struct timespec new_ts;
struct timezone new_tz;
if (tv) {
if (copy_from_user(&user_tv, tv, sizeof(*tv)))
return -EFAULT;
if (!timeval_valid(&user_tv))
return -EINVAL;
new_ts.tv_sec = user_tv.tv_sec;
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
}
if (tz) {
if (copy_from_user(&new_tz, tz, sizeof(*tz)))
return -EFAULT;
}
return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
}
SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
{
struct timex txc; /* Local copy of parameter */
int ret;
/* Copy the user data space into the kernel copy
* structure. But bear in mind that the structures
* may change
*/
if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
return -EFAULT;
ret = do_adjtimex(&txc);
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
}
/**
* current_fs_time - Return FS time
* @sb: Superblock.
*
* Return the current time truncated to the time granularity supported by
* the fs.
*/
struct timespec current_fs_time(struct super_block *sb)
{
struct timespec now = current_kernel_time();
return timespec_trunc(now, sb->s_time_gran);
}
EXPORT_SYMBOL(current_fs_time);
/*
* Convert jiffies to milliseconds and back.
*
* Avoid unnecessary multiplications/divisions in the
* two most common HZ cases:
*/
unsigned int jiffies_to_msecs(const unsigned long j)
{
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
return (MSEC_PER_SEC / HZ) * j;
#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
#else
# if BITS_PER_LONG == 32
return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
# else
return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
# endif
#endif
}
EXPORT_SYMBOL(jiffies_to_msecs);
unsigned int jiffies_to_usecs(const unsigned long j)
{
#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
return (USEC_PER_SEC / HZ) * j;
#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
#else
# if BITS_PER_LONG == 32
return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
# else
return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
# endif
#endif
}
EXPORT_SYMBOL(jiffies_to_usecs);
/**
* timespec_trunc - Truncate timespec to a granularity
* @t: Timespec
* @gran: Granularity in ns.
*
* Truncate a timespec to a granularity. gran must be smaller than a second.
* Always rounds down.
*
* This function should be only used for timestamps returned by
* current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
* it doesn't handle the better resolution of the latter.
*/
struct timespec timespec_trunc(struct timespec t, unsigned gran)
{
/*
* Division is pretty slow so avoid it for common cases.
* Currently current_kernel_time() never returns better than
* jiffies resolution. Exploit that.
*/
if (gran <= jiffies_to_usecs(1) * 1000) {
/* nothing */
} else if (gran == 1000000000) {
t.tv_nsec = 0;
} else {
t.tv_nsec -= t.tv_nsec % gran;
}
return t;
}
EXPORT_SYMBOL(timespec_trunc);
/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
* Assumes input in normal date format, i.e. 1980-12-31 23:59:59
* => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
*
* [For the Julian calendar (which was used in Russia before 1917,
* Britain & colonies before 1752, anywhere else before 1582,
* and is still in use by some communities) leave out the
* -year/100+year/400 terms, and add 10.]
*
* This algorithm was first published by Gauss (I think).
*
* WARNING: this function will overflow on 2106-02-07 06:28:16 on
* machines where long is 32-bit! (However, as time_t is signed, we
* will already get problems at other places on 2038-01-19 03:14:08)
*/
unsigned long
mktime(const unsigned int year0, const unsigned int mon0,
const unsigned int day, const unsigned int hour,
const unsigned int min, const unsigned int sec)
{
unsigned int mon = mon0, year = year0;
/* 1..12 -> 11,12,1..10 */
if (0 >= (int) (mon -= 2)) {
mon += 12; /* Puts Feb last since it has leap day */
year -= 1;
}
return ((((unsigned long)
(year/4 - year/100 + year/400 + 367*mon/12 + day) +
year*365 - 719499
)*24 + hour /* now have hours */
)*60 + min /* now have minutes */
)*60 + sec; /* finally seconds */
}
EXPORT_SYMBOL(mktime);
/**
* set_normalized_timespec - set timespec sec and nsec parts and normalize
*
* @ts: pointer to timespec variable to be set
* @sec: seconds to set
* @nsec: nanoseconds to set
*
* Set seconds and nanoseconds field of a timespec variable and
* normalize to the timespec storage format
*
* Note: The tv_nsec part is always in the range of
* 0 <= tv_nsec < NSEC_PER_SEC
* For negative values only the tv_sec field is negative !
*/
void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
{
while (nsec >= NSEC_PER_SEC) {
/*
* The following asm() prevents the compiler from
* optimising this loop into a modulo operation. See
* also __iter_div_u64_rem() in include/linux/time.h
*/
asm("" : "+rm"(nsec));
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
asm("" : "+rm"(nsec));
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
EXPORT_SYMBOL(set_normalized_timespec);
/**
* ns_to_timespec - Convert nanoseconds to timespec
* @nsec: the nanoseconds value to be converted
*
* Returns the timespec representation of the nsec parameter.
*/
struct timespec ns_to_timespec(const s64 nsec)
{
struct timespec ts;
s32 rem;
if (!nsec)
return (struct timespec) {0, 0};
ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
if (unlikely(rem < 0)) {
ts.tv_sec--;
rem += NSEC_PER_SEC;
}
ts.tv_nsec = rem;
return ts;
}
EXPORT_SYMBOL(ns_to_timespec);
/**
* ns_to_timeval - Convert nanoseconds to timeval
* @nsec: the nanoseconds value to be converted
*
* Returns the timeval representation of the nsec parameter.
*/
struct timeval ns_to_timeval(const s64 nsec)
{
struct timespec ts = ns_to_timespec(nsec);
struct timeval tv;
tv.tv_sec = ts.tv_sec;
tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
return tv;
}
EXPORT_SYMBOL(ns_to_timeval);
#if BITS_PER_LONG == 32
/**
* set_normalized_timespec - set timespec sec and nsec parts and normalize
*
* @ts: pointer to timespec variable to be set
* @sec: seconds to set
* @nsec: nanoseconds to set
*
* Set seconds and nanoseconds field of a timespec variable and
* normalize to the timespec storage format
*
* Note: The tv_nsec part is always in the range of
* 0 <= tv_nsec < NSEC_PER_SEC
* For negative values only the tv_sec field is negative !
*/
void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec)
{
while (nsec >= NSEC_PER_SEC) {
/*
* The following asm() prevents the compiler from
* optimising this loop into a modulo operation. See
* also __iter_div_u64_rem() in include/linux/time.h
*/
asm("" : "+rm"(nsec));
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
asm("" : "+rm"(nsec));
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
EXPORT_SYMBOL(set_normalized_timespec64);
/**
* ns_to_timespec64 - Convert nanoseconds to timespec64
* @nsec: the nanoseconds value to be converted
*
* Returns the timespec64 representation of the nsec parameter.
*/
struct timespec64 ns_to_timespec64(const s64 nsec)
{
struct timespec64 ts;
s32 rem;
if (!nsec)
return (struct timespec64) {0, 0};
ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
if (unlikely(rem < 0)) {
ts.tv_sec--;
rem += NSEC_PER_SEC;
}
ts.tv_nsec = rem;
return ts;
}
EXPORT_SYMBOL(ns_to_timespec64);
#endif
/*
* When we convert to jiffies then we interpret incoming values
* the following way:
*
* - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
*
* - 'too large' values [that would result in larger than
* MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
*
* - all other values are converted to jiffies by either multiplying
* the input value by a factor or dividing it with a factor
*
* We must also be careful about 32-bit overflows.
*/
unsigned long msecs_to_jiffies(const unsigned int m)
{
/*
* Negative value, means infinite timeout:
*/
if ((int)m < 0)
return MAX_JIFFY_OFFSET;
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
/*
* HZ is equal to or smaller than 1000, and 1000 is a nice
* round multiple of HZ, divide with the factor between them,
* but round upwards:
*/
return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
/*
* HZ is larger than 1000, and HZ is a nice round multiple of
* 1000 - simply multiply with the factor between them.
*
* But first make sure the multiplication result cannot
* overflow:
*/
if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
return MAX_JIFFY_OFFSET;
return m * (HZ / MSEC_PER_SEC);
#else
/*
* Generic case - multiply, round and divide. But first
* check that if we are doing a net multiplication, that
* we wouldn't overflow:
*/
if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
return MAX_JIFFY_OFFSET;
return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
>> MSEC_TO_HZ_SHR32;
#endif
}
EXPORT_SYMBOL(msecs_to_jiffies);
unsigned long usecs_to_jiffies(const unsigned int u)
{
if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
return MAX_JIFFY_OFFSET;
#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
return u * (HZ / USEC_PER_SEC);
#else
return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
>> USEC_TO_HZ_SHR32;
#endif
}
EXPORT_SYMBOL(usecs_to_jiffies);
/*
* The TICK_NSEC - 1 rounds up the value to the next resolution. Note
* that a remainder subtract here would not do the right thing as the
* resolution values don't fall on second boundries. I.e. the line:
* nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
* Note that due to the small error in the multiplier here, this
* rounding is incorrect for sufficiently large values of tv_nsec, but
* well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're
* OK.
*
* Rather, we just shift the bits off the right.
*
* The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
* value to a scaled second value.
*/
static unsigned long
__timespec_to_jiffies(unsigned long sec, long nsec)
{
nsec = nsec + TICK_NSEC - 1;
if (sec >= MAX_SEC_IN_JIFFIES){
sec = MAX_SEC_IN_JIFFIES;
nsec = 0;
}
return (((u64)sec * SEC_CONVERSION) +
(((u64)nsec * NSEC_CONVERSION) >>
(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
}
unsigned long
timespec_to_jiffies(const struct timespec *value)
{
return __timespec_to_jiffies(value->tv_sec, value->tv_nsec);
}
EXPORT_SYMBOL(timespec_to_jiffies);
void
jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
{
/*
* Convert jiffies to nanoseconds and separate with
* one divide.
*/
u32 rem;
value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
NSEC_PER_SEC, &rem);
value->tv_nsec = rem;
}
EXPORT_SYMBOL(jiffies_to_timespec);
/*
* We could use a similar algorithm to timespec_to_jiffies (with a
* different multiplier for usec instead of nsec). But this has a
* problem with rounding: we can't exactly add TICK_NSEC - 1 to the
* usec value, since it's not necessarily integral.
*
* We could instead round in the intermediate scaled representation
* (i.e. in units of 1/2^(large scale) jiffies) but that's also
* perilous: the scaling introduces a small positive error, which
* combined with a division-rounding-upward (i.e. adding 2^(scale) - 1
* units to the intermediate before shifting) leads to accidental
* overflow and overestimates.
*
* At the cost of one additional multiplication by a constant, just
* use the timespec implementation.
*/
unsigned long
timeval_to_jiffies(const struct timeval *value)
{
return __timespec_to_jiffies(value->tv_sec,
value->tv_usec * NSEC_PER_USEC);
}
EXPORT_SYMBOL(timeval_to_jiffies);
void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
{
/*
* Convert jiffies to nanoseconds and separate with
* one divide.
*/
u32 rem;
value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
NSEC_PER_SEC, &rem);
value->tv_usec = rem / NSEC_PER_USEC;
}
EXPORT_SYMBOL(jiffies_to_timeval);
/*
* Convert jiffies/jiffies_64 to clock_t and back.
*/
clock_t jiffies_to_clock_t(unsigned long x)
{
#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
# if HZ < USER_HZ
return x * (USER_HZ / HZ);
# else
return x / (HZ / USER_HZ);
# endif
#else
return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
#endif
}
EXPORT_SYMBOL(jiffies_to_clock_t);
unsigned long clock_t_to_jiffies(unsigned long x)
{
#if (HZ % USER_HZ)==0
if (x >= ~0UL / (HZ / USER_HZ))
return ~0UL;
return x * (HZ / USER_HZ);
#else
/* Don't worry about loss of precision here .. */
if (x >= ~0UL / HZ * USER_HZ)
return ~0UL;
/* .. but do try to contain it here */
return div_u64((u64)x * HZ, USER_HZ);
#endif
}
EXPORT_SYMBOL(clock_t_to_jiffies);
u64 jiffies_64_to_clock_t(u64 x)
{
#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
# if HZ < USER_HZ
x = div_u64(x * USER_HZ, HZ);
# elif HZ > USER_HZ
x = div_u64(x, HZ / USER_HZ);
# else
/* Nothing to do */
# endif
#else
/*
* There are better ways that don't overflow early,
* but even this doesn't overflow in hundreds of years
* in 64 bits, so..
*/
x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
#endif
return x;
}
EXPORT_SYMBOL(jiffies_64_to_clock_t);
u64 nsec_to_clock_t(u64 x)
{
#if (NSEC_PER_SEC % USER_HZ) == 0
return div_u64(x, NSEC_PER_SEC / USER_HZ);
#elif (USER_HZ % 512) == 0
return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
#else
/*
* max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
* overflow after 64.99 years.
* exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
*/
return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
#endif
}
/**
* nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
* @n: nsecs in u64
*
* Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
* And this doesn't return MAX_JIFFY_OFFSET since this function is designed
* for scheduler, not for use in device drivers to calculate timeout value.
*
* note:
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
u64 nsecs_to_jiffies64(u64 n)
{
#if (NSEC_PER_SEC % HZ) == 0
/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
return div_u64(n, NSEC_PER_SEC / HZ);
#elif (HZ % 512) == 0
/* overflow after 292 years if HZ = 1024 */
return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
#else
/*
* Generic case - optimized for cases where HZ is a multiple of 3.
* overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
*/
return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
#endif
}
/**
* nsecs_to_jiffies - Convert nsecs in u64 to jiffies
*
* @n: nsecs in u64
*
* Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
* And this doesn't return MAX_JIFFY_OFFSET since this function is designed
* for scheduler, not for use in device drivers to calculate timeout value.
*
* note:
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
unsigned long nsecs_to_jiffies(u64 n)
{
return (unsigned long)nsecs_to_jiffies64(n);
}
EXPORT_SYMBOL_GPL(nsecs_to_jiffies);
/*
* Add two timespec values and do a safety check for overflow.
* It's assumed that both values are valid (>= 0)
*/
struct timespec timespec_add_safe(const struct timespec lhs,
const struct timespec rhs)
{
struct timespec res;
set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
lhs.tv_nsec + rhs.tv_nsec);
if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
res.tv_sec = TIME_T_MAX;
return res;
}

108
kernel/time/timeconst.bc Normal file
View file

@ -0,0 +1,108 @@
scale=0
define gcd(a,b) {
auto t;
while (b) {
t = b;
b = a % b;
a = t;
}
return a;
}
/* Division by reciprocal multiplication. */
define fmul(b,n,d) {
return (2^b*n+d-1)/d;
}
/* Adjustment factor when a ceiling value is used. Use as:
(imul * n) + (fmulxx * n + fadjxx) >> xx) */
define fadj(b,n,d) {
auto v;
d = d/gcd(n,d);
v = 2^b*(d-1)/d;
return v;
}
/* Compute the appropriate mul/adj values as well as a shift count,
which brings the mul value into the range 2^b-1 <= x < 2^b. Such
a shift value will be correct in the signed integer range and off
by at most one in the upper half of the unsigned range. */
define fmuls(b,n,d) {
auto s, m;
for (s = 0; 1; s++) {
m = fmul(s,n,d);
if (m >= 2^(b-1))
return s;
}
return 0;
}
define timeconst(hz) {
print "/* Automatically generated by kernel/timeconst.bc */\n"
print "/* Time conversion constants for HZ == ", hz, " */\n"
print "\n"
print "#ifndef KERNEL_TIMECONST_H\n"
print "#define KERNEL_TIMECONST_H\n\n"
print "#include <linux/param.h>\n"
print "#include <linux/types.h>\n\n"
print "#if HZ != ", hz, "\n"
print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n"
print "#endif\n\n"
if (hz < 2) {
print "#error Totally bogus HZ value!\n"
} else {
s=fmuls(32,1000,hz)
obase=16
print "#define HZ_TO_MSEC_MUL32\tU64_C(0x", fmul(s,1000,hz), ")\n"
print "#define HZ_TO_MSEC_ADJ32\tU64_C(0x", fadj(s,1000,hz), ")\n"
obase=10
print "#define HZ_TO_MSEC_SHR32\t", s, "\n"
s=fmuls(32,hz,1000)
obase=16
print "#define MSEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000), ")\n"
print "#define MSEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000), ")\n"
obase=10
print "#define MSEC_TO_HZ_SHR32\t", s, "\n"
obase=10
cd=gcd(hz,1000)
print "#define HZ_TO_MSEC_NUM\t\t", 1000/cd, "\n"
print "#define HZ_TO_MSEC_DEN\t\t", hz/cd, "\n"
print "#define MSEC_TO_HZ_NUM\t\t", hz/cd, "\n"
print "#define MSEC_TO_HZ_DEN\t\t", 1000/cd, "\n"
print "\n"
s=fmuls(32,1000000,hz)
obase=16
print "#define HZ_TO_USEC_MUL32\tU64_C(0x", fmul(s,1000000,hz), ")\n"
print "#define HZ_TO_USEC_ADJ32\tU64_C(0x", fadj(s,1000000,hz), ")\n"
obase=10
print "#define HZ_TO_USEC_SHR32\t", s, "\n"
s=fmuls(32,hz,1000000)
obase=16
print "#define USEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000000), ")\n"
print "#define USEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000000), ")\n"
obase=10
print "#define USEC_TO_HZ_SHR32\t", s, "\n"
obase=10
cd=gcd(hz,1000000)
print "#define HZ_TO_USEC_NUM\t\t", 1000000/cd, "\n"
print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n"
print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n"
print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n"
print "\n"
print "#endif /* KERNEL_TIMECONST_H */\n"
}
halt
}
timeconst(hz)

127
kernel/time/timeconv.c Normal file
View file

@ -0,0 +1,127 @@
/*
* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
* This file is part of the GNU C Library.
* Contributed by Paul Eggert (eggert@twinsun.com).
*
* The GNU C Library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* The GNU C Library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with the GNU C Library; see the file COPYING.LIB. If not,
* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Converts the calendar time to broken-down time representation
* Based on code from glibc-2.6
*
* 2009-7-14:
* Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
*/
#include <linux/time.h>
#include <linux/module.h>
/*
* Nonzero if YEAR is a leap year (every 4 years,
* except every 100th isn't, and every 400th is).
*/
static int __isleap(long year)
{
return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
}
/* do a mathdiv for long type */
static long math_div(long a, long b)
{
return a / b - (a % b < 0);
}
/* How many leap years between y1 and y2, y1 must less or equal to y2 */
static long leaps_between(long y1, long y2)
{
long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+ math_div(y1 - 1, 400);
long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+ math_div(y2 - 1, 400);
return leaps2 - leaps1;
}
/* How many days come before each month (0-12). */
static const unsigned short __mon_yday[2][13] = {
/* Normal years. */
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
/* Leap years. */
{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
};
#define SECS_PER_HOUR (60 * 60)
#define SECS_PER_DAY (SECS_PER_HOUR * 24)
/**
* time_to_tm - converts the calendar time to local broken-down time
*
* @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970,
* Coordinated Universal Time (UTC).
* @offset offset seconds adding to totalsecs.
* @result pointer to struct tm variable to receive broken-down time
*/
void time_to_tm(time_t totalsecs, int offset, struct tm *result)
{
long days, rem, y;
const unsigned short *ip;
days = totalsecs / SECS_PER_DAY;
rem = totalsecs % SECS_PER_DAY;
rem += offset;
while (rem < 0) {
rem += SECS_PER_DAY;
--days;
}
while (rem >= SECS_PER_DAY) {
rem -= SECS_PER_DAY;
++days;
}
result->tm_hour = rem / SECS_PER_HOUR;
rem %= SECS_PER_HOUR;
result->tm_min = rem / 60;
result->tm_sec = rem % 60;
/* January 1, 1970 was a Thursday. */
result->tm_wday = (4 + days) % 7;
if (result->tm_wday < 0)
result->tm_wday += 7;
y = 1970;
while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
/* Guess a corrected year, assuming 365 days per year. */
long yg = y + math_div(days, 365);
/* Adjust DAYS and Y to match the guessed year. */
days -= (yg - y) * 365 + leaps_between(y, yg);
y = yg;
}
result->tm_year = y - 1900;
result->tm_yday = days;
ip = __mon_yday[__isleap(y)];
for (y = 11; days < ip[y]; y--)
continue;
days -= ip[y];
result->tm_mon = y;
result->tm_mday = days + 1;
}
EXPORT_SYMBOL(time_to_tm);

1821
kernel/time/timekeeping.c Normal file

File diff suppressed because it is too large Load diff

20
kernel/time/timekeeping.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef _KERNEL_TIME_TIMEKEEPING_H
#define _KERNEL_TIME_TIMEKEEPING_H
/*
* Internal interfaces for kernel/time/
*/
extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real,
ktime_t *offs_boot,
ktime_t *offs_tai);
extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real,
ktime_t *offs_boot,
ktime_t *offs_tai);
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
extern int timekeeping_inject_offset(struct timespec *ts);
extern s32 timekeeping_get_tai_offset(void);
extern void timekeeping_set_tai_offset(s32 tai_offset);
extern void timekeeping_clocktai(struct timespec *ts);
#endif

View file

@ -0,0 +1,74 @@
/*
* debugfs file to track time spent in suspend
*
* Copyright (c) 2011, Google, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/debugfs.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/seq_file.h>
#include <linux/time.h>
#include "timekeeping_internal.h"
static unsigned int sleep_time_bin[32] = {0};
static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
{
unsigned int bin;
seq_puts(s, " time (secs) count\n");
seq_puts(s, "------------------------------\n");
for (bin = 0; bin < 32; bin++) {
if (sleep_time_bin[bin] == 0)
continue;
seq_printf(s, "%10u - %-10u %4u\n",
bin ? 1 << (bin - 1) : 0, 1 << bin,
sleep_time_bin[bin]);
}
return 0;
}
static int tk_debug_sleep_time_open(struct inode *inode, struct file *file)
{
return single_open(file, tk_debug_show_sleep_time, NULL);
}
static const struct file_operations tk_debug_sleep_time_fops = {
.open = tk_debug_sleep_time_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init tk_debug_sleep_time_init(void)
{
struct dentry *d;
d = debugfs_create_file("sleep_time", 0444, NULL, NULL,
&tk_debug_sleep_time_fops);
if (!d) {
pr_err("Failed to create sleep_time debug file\n");
return -ENOMEM;
}
return 0;
}
late_initcall(tk_debug_sleep_time_init);
void tk_debug_account_sleep_time(struct timespec64 *t)
{
sleep_time_bin[fls(t->tv_sec)]++;
}

View file

@ -0,0 +1,29 @@
#ifndef _TIMEKEEPING_INTERNAL_H
#define _TIMEKEEPING_INTERNAL_H
/*
* timekeeping debug functions
*/
#include <linux/clocksource.h>
#include <linux/time.h>
#ifdef CONFIG_DEBUG_FS
extern void tk_debug_account_sleep_time(struct timespec64 *t);
#else
#define tk_debug_account_sleep_time(x)
#endif
#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
{
cycle_t ret = (now - last) & mask;
return (s64) ret > 0 ? ret : 0;
}
#else
static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
{
return (now - last) & mask;
}
#endif
#endif /* _TIMEKEEPING_INTERNAL_H */

1738
kernel/time/timer.c Normal file

File diff suppressed because it is too large Load diff

370
kernel/time/timer_list.c Normal file
View file

@ -0,0 +1,370 @@
/*
* kernel/time/timer_list.c
*
* List pending timers
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/tick.h>
#include <asm/uaccess.h>
struct timer_list_iter {
int cpu;
bool second_pass;
u64 now;
};
typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
/*
* This allows printing both to /proc/timer_list and
* to the console (on SysRq-Q):
*/
#define SEQ_printf(m, x...) \
do { \
if (m) \
seq_printf(m, x); \
else \
printk(x); \
} while (0)
static void print_name_offset(struct seq_file *m, void *sym)
{
char symname[KSYM_NAME_LEN];
if (lookup_symbol_name((unsigned long)sym, symname) < 0)
SEQ_printf(m, "<%pK>", sym);
else
SEQ_printf(m, "%s", symname);
}
static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
{
#ifdef CONFIG_TIMER_STATS
char tmp[TASK_COMM_LEN + 1];
#endif
SEQ_printf(m, " #%d: ", idx);
print_name_offset(m, taddr);
SEQ_printf(m, ", ");
print_name_offset(m, timer->function);
SEQ_printf(m, ", S:%02lx", timer->state);
#ifdef CONFIG_TIMER_STATS
SEQ_printf(m, ", ");
print_name_offset(m, timer->start_site);
memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
tmp[TASK_COMM_LEN] = 0;
SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
#endif
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
(unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
(unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)),
(long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now),
(long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now));
}
static void
print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
u64 now)
{
struct hrtimer *timer, tmp;
unsigned long next = 0, i;
struct timerqueue_node *curr;
unsigned long flags;
next_one:
i = 0;
raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
curr = timerqueue_getnext(&base->active);
/*
* Crude but we have to do this O(N*N) thing, because
* we have to unlock the base when printing:
*/
while (curr && i < next) {
curr = timerqueue_iterate_next(curr);
i++;
}
if (curr) {
timer = container_of(curr, struct hrtimer, node);
tmp = *timer;
raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
print_timer(m, timer, &tmp, i, now);
next++;
goto next_one;
}
raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
}
static void
print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
{
SEQ_printf(m, " .base: %pK\n", base);
SEQ_printf(m, " .index: %d\n",
base->index);
SEQ_printf(m, " .resolution: %Lu nsecs\n",
(unsigned long long)ktime_to_ns(base->resolution));
SEQ_printf(m, " .get_time: ");
print_name_offset(m, base->get_time);
SEQ_printf(m, "\n");
#ifdef CONFIG_HIGH_RES_TIMERS
SEQ_printf(m, " .offset: %Lu nsecs\n",
(unsigned long long) ktime_to_ns(base->offset));
#endif
SEQ_printf(m, "active timers:\n");
print_active_timers(m, base, now);
}
static void print_cpu(struct seq_file *m, int cpu, u64 now)
{
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
SEQ_printf(m, "cpu: %d\n", cpu);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
SEQ_printf(m, " clock %d:\n", i);
print_base(m, cpu_base->clock_base + i, now);
}
#define P(x) \
SEQ_printf(m, " .%-15s: %Lu\n", #x, \
(unsigned long long)(cpu_base->x))
#define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(cpu_base->x)))
#ifdef CONFIG_HIGH_RES_TIMERS
P_ns(expires_next);
P(hres_active);
P(nr_events);
P(nr_retries);
P(nr_hangs);
P_ns(max_hang_time);
#endif
#undef P
#undef P_ns
#ifdef CONFIG_TICK_ONESHOT
# define P(x) \
SEQ_printf(m, " .%-15s: %Lu\n", #x, \
(unsigned long long)(ts->x))
# define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(ts->x)))
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
P(nohz_mode);
P_ns(last_tick);
P(tick_stopped);
P(idle_jiffies);
P(idle_calls);
P(idle_sleeps);
P_ns(idle_entrytime);
P_ns(idle_waketime);
P_ns(idle_exittime);
P_ns(idle_sleeptime);
P_ns(iowait_sleeptime);
P(last_jiffies);
P(next_jiffies);
P_ns(idle_expires);
SEQ_printf(m, "jiffies: %Lu\n",
(unsigned long long)jiffies);
}
#endif
#undef P
#undef P_ns
SEQ_printf(m, "\n");
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS
static void
print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
{
struct clock_event_device *dev = td->evtdev;
SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
if (cpu < 0)
SEQ_printf(m, "Broadcast device\n");
else
SEQ_printf(m, "Per CPU device: %d\n", cpu);
SEQ_printf(m, "Clock Event Device: ");
if (!dev) {
SEQ_printf(m, "<NULL>\n");
return;
}
SEQ_printf(m, "%s\n", dev->name);
SEQ_printf(m, " max_delta_ns: %llu\n",
(unsigned long long) dev->max_delta_ns);
SEQ_printf(m, " min_delta_ns: %llu\n",
(unsigned long long) dev->min_delta_ns);
SEQ_printf(m, " mult: %u\n", dev->mult);
SEQ_printf(m, " shift: %u\n", dev->shift);
SEQ_printf(m, " mode: %d\n", dev->mode);
SEQ_printf(m, " next_event: %Ld nsecs\n",
(unsigned long long) ktime_to_ns(dev->next_event));
SEQ_printf(m, " set_next_event: ");
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
SEQ_printf(m, " retries: %lu\n", dev->retries);
SEQ_printf(m, "\n");
}
static void timer_list_show_tickdevices_header(struct seq_file *m)
{
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
cpumask_bits(tick_get_broadcast_mask())[0]);
#ifdef CONFIG_TICK_ONESHOT
SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
#endif
SEQ_printf(m, "\n");
#endif
}
#endif
static inline void timer_list_header(struct seq_file *m, u64 now)
{
SEQ_printf(m, "Timer List Version: v0.7\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");
}
static int timer_list_show(struct seq_file *m, void *v)
{
struct timer_list_iter *iter = v;
if (iter->cpu == -1 && !iter->second_pass)
timer_list_header(m, iter->now);
else if (!iter->second_pass)
print_cpu(m, iter->cpu, iter->now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
else if (iter->cpu == -1 && iter->second_pass)
timer_list_show_tickdevices_header(m);
else
print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
#endif
return 0;
}
void sysrq_timer_list_show(void)
{
u64 now = ktime_to_ns(ktime_get());
int cpu;
timer_list_header(NULL, now);
for_each_online_cpu(cpu)
print_cpu(NULL, cpu, now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
timer_list_show_tickdevices_header(NULL);
for_each_online_cpu(cpu)
print_tickdevice(NULL, tick_get_device(cpu), cpu);
#endif
return;
}
static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
for (; offset; offset--) {
iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
if (iter->cpu >= nr_cpu_ids) {
#ifdef CONFIG_GENERIC_CLOCKEVENTS
if (!iter->second_pass) {
iter->cpu = -1;
iter->second_pass = true;
} else
return NULL;
#else
return NULL;
#endif
}
}
return iter;
}
static void *timer_list_start(struct seq_file *file, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
if (!*offset)
iter->now = ktime_to_ns(ktime_get());
iter->cpu = -1;
iter->second_pass = false;
return move_iter(iter, *offset);
}
static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
++*offset;
return move_iter(iter, 1);
}
static void timer_list_stop(struct seq_file *seq, void *v)
{
}
static const struct seq_operations timer_list_sops = {
.start = timer_list_start,
.next = timer_list_next,
.stop = timer_list_stop,
.show = timer_list_show,
};
static int timer_list_open(struct inode *inode, struct file *filp)
{
return seq_open_private(filp, &timer_list_sops,
sizeof(struct timer_list_iter));
}
static const struct file_operations timer_list_fops = {
.open = timer_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
if (!pe)
return -ENOMEM;
return 0;
}
__initcall(init_timer_list_procfs);

425
kernel/time/timer_stats.c Normal file
View file

@ -0,0 +1,425 @@
/*
* kernel/time/timer_stats.c
*
* Collect timer usage statistics.
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* timer_stats is based on timer_top, a similar functionality which was part of
* Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
* Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
* on dynamic allocation of the statistics entries and linear search based
* lookup combined with a global lock, rather than the static array, hash
* and per-CPU locking which is used by timer_stats. It was written for the
* pre hrtimer kernel code and therefore did not take hrtimers into account.
* Nevertheless it provided the base for the timer_stats implementation and
* was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
* for this effort.
*
* timer_top.c is
* Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
* Written by Daniel Petrini <d.pensator@gmail.com>
* timer_top.c was released under the GNU General Public License version 2
*
* We export the addresses and counting of timer functions being called,
* the pid and cmdline from the owner process if applicable.
*
* Start/stop data collection:
* # echo [1|0] >/proc/timer_stats
*
* Display the information collected so far:
* # cat /proc/timer_stats
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <asm/uaccess.h>
/*
* This is our basic unit of interest: a timer expiry event identified
* by the timer, its start/expire functions and the PID of the task that
* started the timer. We count the number of times an event happens:
*/
struct entry {
/*
* Hash list:
*/
struct entry *next;
/*
* Hash keys:
*/
void *timer;
void *start_func;
void *expire_func;
pid_t pid;
/*
* Number of timeout events:
*/
unsigned long count;
unsigned int timer_flag;
/*
* We save the command-line string to preserve
* this information past task exit:
*/
char comm[TASK_COMM_LEN + 1];
} ____cacheline_aligned_in_smp;
/*
* Spinlock protecting the tables - not taken during lookup:
*/
static DEFINE_RAW_SPINLOCK(table_lock);
/*
* Per-CPU lookup locks for fast hash lookup:
*/
static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
/*
* Mutex to serialize state changes with show-stats activities:
*/
static DEFINE_MUTEX(show_mutex);
/*
* Collection status, active/inactive:
*/
int __read_mostly timer_stats_active;
/*
* Beginning/end timestamps of measurement:
*/
static ktime_t time_start, time_stop;
/*
* tstat entry structs only get allocated while collection is
* active and never freed during that time - this simplifies
* things quite a bit.
*
* They get freed when a new collection period is started.
*/
#define MAX_ENTRIES_BITS 10
#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
static unsigned long nr_entries;
static struct entry entries[MAX_ENTRIES];
static atomic_t overflow_count;
/*
* The entries are in a hash-table, for fast lookup:
*/
#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
#define __tstat_hashfn(entry) \
(((unsigned long)(entry)->timer ^ \
(unsigned long)(entry)->start_func ^ \
(unsigned long)(entry)->expire_func ^ \
(unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
static void reset_entries(void)
{
nr_entries = 0;
memset(entries, 0, sizeof(entries));
memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
atomic_set(&overflow_count, 0);
}
static struct entry *alloc_entry(void)
{
if (nr_entries >= MAX_ENTRIES)
return NULL;
return entries + nr_entries++;
}
static int match_entries(struct entry *entry1, struct entry *entry2)
{
return entry1->timer == entry2->timer &&
entry1->start_func == entry2->start_func &&
entry1->expire_func == entry2->expire_func &&
entry1->pid == entry2->pid;
}
/*
* Look up whether an entry matching this item is present
* in the hash already. Must be called with irqs off and the
* lookup lock held:
*/
static struct entry *tstat_lookup(struct entry *entry, char *comm)
{
struct entry **head, *curr, *prev;
head = tstat_hashentry(entry);
curr = *head;
/*
* The fastpath is when the entry is already hashed,
* we do this with the lookup lock held, but with the
* table lock not held:
*/
while (curr) {
if (match_entries(curr, entry))
return curr;
curr = curr->next;
}
/*
* Slowpath: allocate, set up and link a new hash entry:
*/
prev = NULL;
curr = *head;
raw_spin_lock(&table_lock);
/*
* Make sure we have not raced with another CPU:
*/
while (curr) {
if (match_entries(curr, entry))
goto out_unlock;
prev = curr;
curr = curr->next;
}
curr = alloc_entry();
if (curr) {
*curr = *entry;
curr->count = 0;
curr->next = NULL;
memcpy(curr->comm, comm, TASK_COMM_LEN);
smp_mb(); /* Ensure that curr is initialized before insert */
if (prev)
prev->next = curr;
else
*head = curr;
}
out_unlock:
raw_spin_unlock(&table_lock);
return curr;
}
/**
* timer_stats_update_stats - Update the statistics for a timer.
* @timer: pointer to either a timer_list or a hrtimer
* @pid: the pid of the task which set up the timer
* @startf: pointer to the function which did the timer setup
* @timerf: pointer to the timer callback function of the timer
* @comm: name of the process which set up the timer
*
* When the timer is already registered, then the event counter is
* incremented. Otherwise the timer is registered in a free slot.
*/
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
void *timerf, char *comm,
unsigned int timer_flag)
{
/*
* It doesn't matter which lock we take:
*/
raw_spinlock_t *lock;
struct entry *entry, input;
unsigned long flags;
if (likely(!timer_stats_active))
return;
lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
input.timer = timer;
input.start_func = startf;
input.expire_func = timerf;
input.pid = pid;
input.timer_flag = timer_flag;
raw_spin_lock_irqsave(lock, flags);
if (!timer_stats_active)
goto out_unlock;
entry = tstat_lookup(&input, comm);
if (likely(entry))
entry->count++;
else
atomic_inc(&overflow_count);
out_unlock:
raw_spin_unlock_irqrestore(lock, flags);
}
static void print_name_offset(struct seq_file *m, unsigned long addr)
{
char symname[KSYM_NAME_LEN];
if (lookup_symbol_name(addr, symname) < 0)
seq_printf(m, "<%p>", (void *)addr);
else
seq_printf(m, "%s", symname);
}
static int tstats_show(struct seq_file *m, void *v)
{
struct timespec period;
struct entry *entry;
unsigned long ms;
long events = 0;
ktime_t time;
int i;
mutex_lock(&show_mutex);
/*
* If still active then calculate up to now:
*/
if (timer_stats_active)
time_stop = ktime_get();
time = ktime_sub(time_stop, time_start);
period = ktime_to_timespec(time);
ms = period.tv_nsec / 1000000;
seq_puts(m, "Timer Stats Version: v0.3\n");
seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
if (atomic_read(&overflow_count))
seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count));
seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");
for (i = 0; i < nr_entries; i++) {
entry = entries + i;
if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
seq_printf(m, "%4luD, %5d %-16s ",
entry->count, entry->pid, entry->comm);
} else {
seq_printf(m, " %4lu, %5d %-16s ",
entry->count, entry->pid, entry->comm);
}
print_name_offset(m, (unsigned long)entry->start_func);
seq_puts(m, " (");
print_name_offset(m, (unsigned long)entry->expire_func);
seq_puts(m, ")\n");
events += entry->count;
}
ms += period.tv_sec * 1000;
if (!ms)
ms = 1;
if (events && period.tv_sec)
seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
events, events * 1000 / ms,
(events * 1000000 / ms) % 1000);
else
seq_printf(m, "%ld total events\n", events);
mutex_unlock(&show_mutex);
return 0;
}
/*
* After a state change, make sure all concurrent lookup/update
* activities have stopped:
*/
static void sync_access(void)
{
unsigned long flags;
int cpu;
for_each_online_cpu(cpu) {
raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
raw_spin_lock_irqsave(lock, flags);
/* nothing */
raw_spin_unlock_irqrestore(lock, flags);
}
}
static ssize_t tstats_write(struct file *file, const char __user *buf,
size_t count, loff_t *offs)
{
char ctl[2];
if (count != 2 || *offs)
return -EINVAL;
if (copy_from_user(ctl, buf, count))
return -EFAULT;
mutex_lock(&show_mutex);
switch (ctl[0]) {
case '0':
if (timer_stats_active) {
timer_stats_active = 0;
time_stop = ktime_get();
sync_access();
}
break;
case '1':
if (!timer_stats_active) {
reset_entries();
time_start = ktime_get();
smp_mb();
timer_stats_active = 1;
}
break;
default:
count = -EINVAL;
}
mutex_unlock(&show_mutex);
return count;
}
static int tstats_open(struct inode *inode, struct file *filp)
{
return single_open(filp, tstats_show, NULL);
}
static const struct file_operations tstats_fops = {
.open = tstats_open,
.read = seq_read,
.write = tstats_write,
.llseek = seq_lseek,
.release = single_release,
};
void __init init_timer_stats(void)
{
int cpu;
for_each_possible_cpu(cpu)
raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
}
static int __init init_tstats_procfs(void)
{
struct proc_dir_entry *pe;
pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
if (!pe)
return -ENOMEM;
return 0;
}
__initcall(init_tstats_procfs);

168
kernel/time/udelay_test.c Normal file
View file

@ -0,0 +1,168 @@
/*
* udelay() test kernel module
*
* Test is executed by writing and reading to /sys/kernel/debug/udelay_test
* Tests are configured by writing: USECS ITERATIONS
* Tests are executed by reading from the same file.
* Specifying usecs of 0 or negative values will run multiples tests.
*
* Copyright (C) 2014 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#define DEFAULT_ITERATIONS 100
#define DEBUGFS_FILENAME "udelay_test"
static DEFINE_MUTEX(udelay_test_lock);
static struct dentry *udelay_test_debugfs_file;
static int udelay_test_usecs;
static int udelay_test_iterations = DEFAULT_ITERATIONS;
static int udelay_test_single(struct seq_file *s, int usecs, uint32_t iters)
{
int min = 0, max = 0, fail_count = 0;
uint64_t sum = 0;
uint64_t avg;
int i;
/* Allow udelay to be up to 0.5% fast */
int allowed_error_ns = usecs * 5;
for (i = 0; i < iters; ++i) {
struct timespec ts1, ts2;
int time_passed;
ktime_get_ts(&ts1);
udelay(usecs);
ktime_get_ts(&ts2);
time_passed = timespec_to_ns(&ts2) - timespec_to_ns(&ts1);
if (i == 0 || time_passed < min)
min = time_passed;
if (i == 0 || time_passed > max)
max = time_passed;
if ((time_passed + allowed_error_ns) / 1000 < usecs)
++fail_count;
WARN_ON(time_passed < 0);
sum += time_passed;
}
avg = sum;
do_div(avg, iters);
seq_printf(s, "%d usecs x %d: exp=%d allowed=%d min=%d avg=%lld max=%d",
usecs, iters, usecs * 1000,
(usecs * 1000) - allowed_error_ns, min, avg, max);
if (fail_count)
seq_printf(s, " FAIL=%d", fail_count);
seq_puts(s, "\n");
return 0;
}
static int udelay_test_show(struct seq_file *s, void *v)
{
int usecs;
int iters;
int ret = 0;
mutex_lock(&udelay_test_lock);
usecs = udelay_test_usecs;
iters = udelay_test_iterations;
mutex_unlock(&udelay_test_lock);
if (usecs > 0 && iters > 0) {
return udelay_test_single(s, usecs, iters);
} else if (usecs == 0) {
struct timespec ts;
ktime_get_ts(&ts);
seq_printf(s, "udelay() test (lpj=%ld kt=%ld.%09ld)\n",
loops_per_jiffy, ts.tv_sec, ts.tv_nsec);
seq_puts(s, "usage:\n");
seq_puts(s, "echo USECS [ITERS] > " DEBUGFS_FILENAME "\n");
seq_puts(s, "cat " DEBUGFS_FILENAME "\n");
}
return ret;
}
static int udelay_test_open(struct inode *inode, struct file *file)
{
return single_open(file, udelay_test_show, inode->i_private);
}
static ssize_t udelay_test_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
char lbuf[32];
int ret;
int usecs;
int iters;
if (count >= sizeof(lbuf))
return -EINVAL;
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
lbuf[count] = '\0';
ret = sscanf(lbuf, "%d %d", &usecs, &iters);
if (ret < 1)
return -EINVAL;
else if (ret < 2)
iters = DEFAULT_ITERATIONS;
mutex_lock(&udelay_test_lock);
udelay_test_usecs = usecs;
udelay_test_iterations = iters;
mutex_unlock(&udelay_test_lock);
return count;
}
static const struct file_operations udelay_test_debugfs_ops = {
.owner = THIS_MODULE,
.open = udelay_test_open,
.read = seq_read,
.write = udelay_test_write,
.llseek = seq_lseek,
.release = single_release,
};
static int __init udelay_test_init(void)
{
mutex_lock(&udelay_test_lock);
udelay_test_debugfs_file = debugfs_create_file(DEBUGFS_FILENAME,
S_IRUSR, NULL, NULL, &udelay_test_debugfs_ops);
mutex_unlock(&udelay_test_lock);
return 0;
}
module_init(udelay_test_init);
static void __exit udelay_test_exit(void)
{
mutex_lock(&udelay_test_lock);
debugfs_remove(udelay_test_debugfs_file);
mutex_unlock(&udelay_test_lock);
}
module_exit(udelay_test_exit);
MODULE_AUTHOR("David Riley <davidriley@chromium.org>");
MODULE_LICENSE("GPL");