Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,219 @@
#!/bin/bash
#
# NAME
# failcmd.sh - run a command with injecting slab/page allocation failures
#
# SYNOPSIS
# failcmd.sh --help
# failcmd.sh [<options>] command [arguments]
#
# DESCRIPTION
# Run command with injecting slab/page allocation failures by fault
# injection.
#
# NOTE: you need to run this script as root.
#
usage()
{
cat >&2 <<EOF
Usage: $0 [options] command [arguments]
OPTIONS
-p percent
--probability=percent
likelihood of failure injection, in percent.
Default value is 1
-t value
--times=value
specifies how many times failures may happen at most.
Default value is 1
--oom-kill-allocating-task=value
set /proc/sys/vm/oom_kill_allocating_task to specified value
before running the command.
Default value is 1
-h, --help
Display a usage message and exit
--interval=value, --space=value, --verbose=value, --task-filter=value,
--stacktrace-depth=value, --require-start=value, --require-end=value,
--reject-start=value, --reject-end=value, --ignore-gfp-wait=value
See Documentation/fault-injection/fault-injection.txt for more
information
failslab options:
--cache-filter=value
fail_page_alloc options:
--ignore-gfp-highmem=value, --min-order=value
ENVIRONMENT
FAILCMD_TYPE
The following values for FAILCMD_TYPE are recognized:
failslab
inject slab allocation failures
fail_page_alloc
inject page allocation failures
If FAILCMD_TYPE is not defined, then failslab is used.
EOF
}
if [ $UID != 0 ]; then
echo must be run as root >&2
exit 1
fi
DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'`
if [ ! -d "$DEBUGFS" ]; then
echo debugfs is not mounted >&2
exit 1
fi
FAILCMD_TYPE=${FAILCMD_TYPE:-failslab}
FAULTATTR=$DEBUGFS/$FAILCMD_TYPE
if [ ! -d $FAULTATTR ]; then
echo $FAILCMD_TYPE is not available >&2
exit 1
fi
LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter:
LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end:
LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help
if [ $FAILCMD_TYPE = failslab ]; then
LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter:
elif [ $FAILCMD_TYPE = fail_page_alloc ]; then
LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order:
fi
TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"`
if [ $? != 0 ]; then
usage
exit 1
fi
eval set -- "$TEMP"
fault_attr_default()
{
echo N > $FAULTATTR/task-filter
echo 0 > $FAULTATTR/probability
echo 1 > $FAULTATTR/times
}
fault_attr_default
oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task`
restore_values()
{
fault_attr_default
echo $oom_kill_allocating_task_saved \
> /proc/sys/vm/oom_kill_allocating_task
}
#
# Default options
#
declare -i oom_kill_allocating_task=1
declare task_filter=Y
declare -i probability=1
declare -i times=1
while true; do
case "$1" in
-p|--probability)
probability=$2
shift 2
;;
-i|--interval)
echo $2 > $FAULTATTR/interval
shift 2
;;
-t|--times)
times=$2
shift 2
;;
-s|--space)
echo $2 > $FAULTATTR/space
shift 2
;;
-v|--verbose)
echo $2 > $FAULTATTR/verbose
shift 2
;;
--task-filter)
task_filter=$2
shift 2
;;
--stacktrace-depth)
echo $2 > $FAULTATTR/stacktrace-depth
shift 2
;;
--require-start)
echo $2 > $FAULTATTR/require-start
shift 2
;;
--require-end)
echo $2 > $FAULTATTR/require-end
shift 2
;;
--reject-start)
echo $2 > $FAULTATTR/reject-start
shift 2
;;
--reject-end)
echo $2 > $FAULTATTR/reject-end
shift 2
;;
--oom-kill-allocating-task)
oom_kill_allocating_task=$2
shift 2
;;
--ignore-gfp-wait)
echo $2 > $FAULTATTR/ignore-gfp-wait
shift 2
;;
--cache-filter)
echo $2 > $FAULTATTR/cache_filter
shift 2
;;
--ignore-gfp-highmem)
echo $2 > $FAULTATTR/ignore-gfp-highmem
shift 2
;;
--min-order)
echo $2 > $FAULTATTR/min-order
shift 2
;;
-h|--help)
usage
exit 0
shift
;;
--)
shift
break
;;
esac
done
[ -z "$1" ] && exit 0
echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task
echo $task_filter > $FAULTATTR/task-filter
echo $probability > $FAULTATTR/probability
echo $times > $FAULTATTR/times
trap "restore_values" SIGINT SIGTERM EXIT
cmd="echo 1 > /proc/self/make-it-fail && exec $@"
bash -c "$cmd"

View file

@ -0,0 +1,32 @@
#!/usr/bin/perl
open (IN,"ktest.pl");
while (<IN>) {
# hashes are now used
if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
/^\s*"?([A-Z].*?)"?\s*=>\s*/ ||
/set_test_option\("(.*?)"/) {
$opt{$1} = 1;
}
}
close IN;
open (IN, "sample.conf");
while (<IN>) {
if (/^\s*#?\s*([A-Z]\S*)\s*=/) {
$samp{$1} = 1;
}
}
close IN;
foreach $opt (keys %opt) {
if (!defined($samp{$opt})) {
print "opt = $opt\n";
}
}
foreach $samp (keys %samp) {
if (!defined($opt{$samp})) {
print "samp = $samp\n";
}
}

View file

@ -0,0 +1,32 @@
This directory contains example configs to use ktest for various tasks.
The configs still need to be customized for your environment, but it
is broken up by task which makes it easier to understand how to set up
ktest.
The configs are based off of real working configs but have been modified
and commented to show more generic use cases that are more helpful for
developers.
crosstests.conf - this config shows an example of testing a git repo against
lots of different architectures. It only does build tests, but makes
it easy to compile test different archs. You can download the arch
cross compilers from:
http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
test.conf - A generic example of a config. This is based on an actual config
used to perform real testing.
kvm.conf - A example of a config that is used to test a virtual guest running
on a host.
snowball.conf - An example config that was used to demo ktest.pl against
a snowball ARM board.
include/ - The include directory holds default configs that can be
included into other configs. This is a real use example that shows how
to reuse configs for various machines or set ups. The files here
are included by other config files, where the other config files define
options and variables that will make the included config work for the
given environment.

View file

@ -0,0 +1,254 @@
#
# Example config for cross compiling
#
# In this config, it is expected that the tool chains from:
#
# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
#
# running on a x86_64 system have been downloaded and installed into:
#
# /usr/local/
#
# such that the compiler binaries are something like:
#
# /usr/local/gcc-4.5.2-nolibc/mips-linux/bin/mips-linux-gcc
#
# Some of the archs will use gcc-4.5.1 instead of gcc-4.5.2
# this config uses variables to differentiate them.
#
# Comments describe some of the options, but full descriptions of
# options are described in the samples.conf file.
# ${PWD} is defined by ktest.pl to be the directory that the user
# was in when they executed ktest.pl. It may be better to hardcode the
# path name here. THIS_DIR is the variable used through out the config file
# in case you want to change it.
THIS_DIR := ${PWD}
# Update the BUILD_DIR option to the location of your git repo you want to test.
BUILD_DIR = ${THIS_DIR}/linux.git
# The build will go into this directory. It will be created when you run the test.
OUTPUT_DIR = ${THIS_DIR}/cross-compile
# The build will be compiled with -j8
BUILD_OPTIONS = -j8
# The test will not stop when it hits a failure.
DIE_ON_FAILURE = 0
# If you want to have ktest.pl store the failure somewhere, uncomment this option
# and change the directory where ktest should store the failures.
#STORE_FAILURES = ${THIS_DIR}/failures
# The log file is stored in the OUTPUT_DIR called cross.log
# If you enable this, you need to create the OUTPUT_DIR. It wont be created for you.
LOG_FILE = ${OUTPUT_DIR}/cross.log
# The log file will be cleared each time you run ktest.
CLEAR_LOG = 1
# As some archs do not build with the defconfig, they have been marked
# to be ignored. If you want to test them anyway, change DO_FAILED to 1.
# If a test that has been marked as DO_FAILED passes, then you should change
# that test to be DO_DEFAULT
DO_FAILED := 0
DO_DEFAULT := 1
# By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single
# arch that you want to test. (uncomment RUN and chose your arch)
#RUN := m32r
# At the bottom of the config file exists a bisect test. You can update that
# test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable
# to run the bisect on the arch.
#RUN := bisect
# By default all tests will be running gcc 4.5.2. Some tests are using 4.5.1
# and they select that in the test.
# Note: GCC_VER is declared as on option and not a variable ('=' instead of ':=')
# This is important. A variable is used only in the config file and if it is set
# it stays that way for the rest of the config file until it is change again.
# Here we want GCC_VER to remain persistent and change for each test, as it is used in
# the MAKE_CMD. By using '=' instead of ':=' we achieve our goal.
GCC_VER = 4.5.2
MAKE_CMD = PATH=/usr/local/gcc-${GCC_VER}-nolibc/${CROSS}/bin:$PATH CROSS_COMPILE=${CROSS}- make ARCH=${ARCH}
# all tests are only doing builds.
TEST_TYPE = build
# If you want to add configs on top of the defconfig, you can add those configs into
# the add-config file and uncomment this option. This is useful if you want to test
# all cross compiles with PREEMPT set, or TRACING on, etc.
#ADD_CONFIG = ${THIS_DIR}/add-config
# All tests are using defconfig
BUILD_TYPE = defconfig
# The test names will have the arch and cross compiler used. This will be shown in
# the results.
TEST_NAME = ${ARCH} ${CROSS}
# alpha
TEST_START IF ${RUN} == alpha || ${DO_DEFAULT}
# Notice that CROSS and ARCH are also options and not variables (again '=' instead
# of ':='). This is because TEST_NAME and MAKE_CMD wil use them for each test.
# Only options are available during runs. Variables are only present in parsing the
# config file.
CROSS = alpha-linux
ARCH = alpha
# arm
TEST_START IF ${RUN} == arm || ${DO_DEFAULT}
CROSS = arm-unknown-linux-gnueabi
ARCH = arm
# black fin
TEST_START IF ${RUN} == bfin || ${DO_DEFAULT}
CROSS = bfin-uclinux
ARCH = blackfin
BUILD_OPTIONS = -j8 vmlinux
# cris - FAILS?
TEST_START IF ${RUN} == cris || ${RUN} == cris64 || ${DO_FAILED}
CROSS = cris-linux
ARCH = cris
# cris32 - not right arch?
TEST_START IF ${RUN} == cris || ${RUN} == cris32 || ${DO_FAILED}
CROSS = crisv32-linux
ARCH = cris
# ia64
TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT}
CROSS = ia64-linux
ARCH = ia64
# frv
TEST_START IF ${RUN} == frv || ${DO_FAILED}
CROSS = frv-linux
ARCH = frv
GCC_VER = 4.5.1
# m68k fails with error?
TEST_START IF ${RUN} == m68k || ${DO_DEFAULT}
CROSS = m68k-linux
ARCH = m68k
# mips64
TEST_START IF ${RUN} == mips || ${RUN} == mips64 || ${DO_DEFAULT}
CROSS = mips64-linux
ARCH = mips
# mips32
TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT}
CROSS = mips-linux
ARCH = mips
# m32r
TEST_START IF ${RUN} == m32r || ${DO_FAILED}
CROSS = m32r-linux
ARCH = m32r
GCC_VER = 4.5.1
BUILD_OPTIONS = -j8 vmlinux
# parisc64 failed?
TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED}
CROSS = hppa64-linux
ARCH = parisc
# parisc
TEST_START IF ${RUN} == hppa || ${RUN} == hppa32 || ${DO_FAILED}
CROSS = hppa-linux
ARCH = parisc
# ppc
TEST_START IF ${RUN} == ppc || ${RUN} == ppc32 || ${DO_DEFAULT}
CROSS = powerpc-linux
ARCH = powerpc
# ppc64
TEST_START IF ${RUN} == ppc || ${RUN} == ppc64 || ${DO_DEFAULT}
CROSS = powerpc64-linux
ARCH = powerpc
# s390
TEST_START IF ${RUN} == s390 || ${DO_DEFAULT}
CROSS = s390x-linux
ARCH = s390
# sh
TEST_START IF ${RUN} == sh || ${DO_DEFAULT}
CROSS = sh4-linux
ARCH = sh
# sparc64
TEST_START IF ${RUN} == sparc || ${RUN} == sparc64 || ${DO_DEFAULT}
CROSS = sparc64-linux
ARCH = sparc64
# sparc
TEST_START IF ${RUN} == sparc || ${RUN} == sparc32 || ${DO_DEFAULT}
CROSS = sparc-linux
ARCH = sparc
# xtensa failed
TEST_START IF ${RUN} == xtensa || ${DO_FAILED}
CROSS = xtensa-linux
ARCH = xtensa
# UML
TEST_START IF ${RUN} == uml || ${DO_DEFAULT}
MAKE_CMD = make ARCH=um SUBARCH=x86_64
ARCH = uml
CROSS =
TEST_START IF ${RUN} == x86 || ${RUN} == i386 || ${DO_DEFAULT}
MAKE_CMD = make ARCH=i386
ARCH = i386
CROSS =
TEST_START IF ${RUN} == x86 || ${RUN} == x86_64 || ${DO_DEFAULT}
MAKE_CMD = make ARCH=x86_64
ARCH = x86_64
CROSS =
#################################
# This is a bisect if needed. You need to give it a MIN_CONFIG that
# will be the config file it uses. Basically, just copy the created defconfig
# for the arch someplace and point MIN_CONFIG to it.
TEST_START IF ${RUN} == bisect
MIN_CONFIG = ${THIS_DIR}/min-config
CROSS = s390x-linux
ARCH = s390
TEST_TYPE = bisect
BISECT_TYPE = build
BISECT_GOOD = v3.1
BISECT_BAD = v3.2
CHECKOUT = v3.2
#################################
# These defaults are needed to keep ktest.pl from complaining. They are
# ignored because the test does not go pass the build. No install or
# booting of the target images.
DEFAULTS
MACHINE = crosstest
SSH_USER = root
BUILD_TARGET = cross
TARGET_IMAGE = image
POWER_CYCLE = cycle
CONSOLE = console
LOCALVERSION = version
GRUB_MENU = grub
REBOOT_ON_ERROR = 0
POWEROFF_ON_ERROR = 0
POWEROFF_ON_SUCCESS = 0
REBOOT_ON_SUCCESS = 0

View file

@ -0,0 +1,90 @@
#
# This example shows the bisect tests (git bisect and config bisect)
#
# The config that includes this file may define a RUN_TEST
# variable that will tell this config what test to run.
# (what to set the TEST option to).
#
DEFAULTS IF NOT DEFINED RUN_TEST
# Requires that hackbench is in the PATH
RUN_TEST := ${SSH} hackbench 50
# Set TEST to 'bisect' to do a normal git bisect. You need
# to modify the options below to make it bisect the exact
# commits you are interested in.
#
TEST_START IF ${TEST} == bisect
TEST_TYPE = bisect
# You must set the commit that was considered good (git bisect good)
BISECT_GOOD = v3.3
# You must set the commit that was considered bad (git bisect bad)
BISECT_BAD = HEAD
# It's best to specify the branch to checkout before starting the bisect.
CHECKOUT = origin/master
# This can be build, boot, or test. Here we are doing a bisect
# that requires to run a test to know if the bisect was good or bad.
# The test should exit with 0 on good, non-zero for bad. But see
# the BISECT_RET_* options in samples.conf to override this.
BISECT_TYPE = test
TEST = ${RUN_TEST}
# It is usually a good idea to confirm that the GOOD and the BAD
# commits are truly good and bad respectively. Having BISECT_CHECK
# set to 1 will check both that the good commit works and the bad
# commit fails. If you only want to check one or the other,
# set BISECT_CHECK to 'good' or to 'bad'.
BISECT_CHECK = 1
#BISECT_CHECK = good
#BISECT_CHECK = bad
# Usually it's a good idea to specify the exact config you
# want to use throughout the entire bisect. Here we placed
# it in the directory we called ktest.pl from and named it
# 'config-bisect'.
MIN_CONFIG = ${THIS_DIR}/config-bisect
# By default, if we are doing a BISECT_TYPE = test run but the
# build or boot fails, ktest.pl will do a 'git bisect skip'.
# Uncomment the below option to make ktest stop testing on such
# an error.
#BISECT_SKIP = 0
# Now if you had BISECT_SKIP = 0 and the test fails, you can
# examine what happened and then do 'git bisect log > /tmp/replay'
# Set BISECT_REPLAY to /tmp/replay and ktest.pl will run the
# 'git bisect replay /tmp/replay' before continuing the bisect test.
#BISECT_REPLAY = /tmp/replay
# If you used BISECT_REPLAY after the bisect test failed, you may
# not want to continue the bisect on that commit that failed.
# By setting BISECT_START to a new commit. ktest.pl will checkout
# that commit after it has performed the 'git bisect replay' but
# before it continues running the bisect test.
#BISECT_START = 2545eb6198e7e1ec50daa0cfc64a4cdfecf24ec9
# Now if you don't trust ktest.pl to make the decisions for you, then
# set BISECT_MANUAL to 1. This will cause ktest.pl not to decide
# if the commit was good or bad. Instead, it will ask you to tell
# it if the current commit was good. In the mean time, you could
# take the result, load it on any machine you want. Run several tests,
# or whatever you feel like. Then, when you are happy, you can tell
# ktest if you think it was good or not and ktest.pl will continue
# the git bisect. You can even change what commit it is currently at.
#BISECT_MANUAL = 1
# One of the unique tests that ktest does is the config bisect.
# Currently (which hopefully will be fixed soon), the bad config
# must be a superset of the good config. This is because it only
# searches for a config that causes the target to fail. If the
# good config is not a subset of the bad config, or if the target
# fails because of a lack of a config, then it will not find
# the config for you.
TEST_START IF ${TEST} == config-bisect
TEST_TYPE = config_bisect
# set to build, boot, test
CONFIG_BISECT_TYPE = boot
# Set the config that is considered bad.
CONFIG_BISECT = ${THIS_DIR}/config-bad
# This config is optional. By default it uses the
# MIN_CONFIG as the good config.
CONFIG_BISECT_GOOD = ${THIS_DIR}/config-good

View file

@ -0,0 +1,157 @@
# This file holds defaults for most the tests. It defines the options that
# are most common to tests that are likely to be shared.
#
# Note, after including this file, a config file may override any option
# with a DEFAULTS OVERRIDE section.
#
# For those cases that use the same machine to boot a 64 bit
# and a 32 bit version. The MACHINE is the DNS name to get to the
# box (usually different if it was 64 bit or 32 bit) but the
# BOX here is defined as a variable that will be the name of the box
# itself. It is useful for calling scripts that will power cycle
# the box, as only one script needs to be created to power cycle
# even though the box itself has multiple operating systems on it.
# By default, BOX and MACHINE are the same.
DEFAULTS IF NOT DEFINED BOX
BOX := ${MACHINE}
# Consider each box as 64 bit box, unless the config including this file
# has defined BITS = 32
DEFAULTS IF NOT DEFINED BITS
BITS := 64
DEFAULTS
# THIS_DIR is used through out the configs and defaults to ${PWD} which
# is the directory that ktest.pl was called from.
THIS_DIR := ${PWD}
# to organize your configs, having each machine save their configs
# into a separate directly is useful.
CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE}
# Reset the log before running each test.
CLEAR_LOG = 1
# As installing kernels usually requires root privilege, default the
# user on the target as root. It is also required that the target
# allows ssh to root from the host without asking for a password.
SSH_USER = root
# For accesing the machine, we will ssh to root@machine.
SSH := ssh ${SSH_USER}@${MACHINE}
# Update this. The default here is ktest will ssh to the target box
# and run a script called 'run-test' located on that box.
TEST = ${SSH} run-test
# Point build dir to the git repo you use
BUILD_DIR = ${THIS_DIR}/linux.git
# Each machine will have its own output build directory.
OUTPUT_DIR = ${THIS_DIR}/build/${MACHINE}
# Yes this config is focused on x86 (but ktest works for other archs too)
BUILD_TARGET = arch/x86/boot/bzImage
TARGET_IMAGE = /boot/vmlinuz-test
# have directory for the scripts to reboot and power cycle the boxes
SCRIPTS_DIR := ${THIS_DIR}/scripts
# You can have each box/machine have a script to power cycle it.
# Name your script <box>-cycle.
POWER_CYCLE = ${SCRIPTS_DIR}/${BOX}-cycle
# This script is used to power off the box.
POWER_OFF = ${SCRIPTS_DIR}/${BOX}-poweroff
# Keep your test kernels separate from your other kernels.
LOCALVERSION = -test
# The /boot/grub/menu.lst is searched for the line:
# title Test Kernel
# and ktest will use that kernel to reboot into.
# For grub2 or other boot loaders, you need to set BOOT_TYPE
# to 'script' and define other ways to load the kernel.
# See snowball.conf example.
#
GRUB_MENU = Test Kernel
# The kernel build will use this option.
BUILD_OPTIONS = -j8
# Keeping the log file with the output dir is convenient.
LOG_FILE = ${OUTPUT_DIR}/${MACHINE}.log
# Each box should have their own minum configuration
# See min-config.conf
MIN_CONFIG = ${CONFIG_DIR}/config-min
# For things like randconfigs, there may be configs you find that
# are already broken, or there may be some configs that you always
# want set. Uncomment ADD_CONFIG and point it to the make config files
# that set the configs you want to keep on (or off) in your build.
# ADD_CONFIG is usually something to add configs to all machines,
# where as, MIN_CONFIG is specific per machine.
#ADD_CONFIG = ${THIS_DIR}/config-broken ${THIS_DIR}/config-general
# To speed up reboots for bisects and patchcheck, instead of
# waiting 60 seconds for the console to be idle, if this line is
# seen in the console output, ktest will know the good kernel has
# finished rebooting and it will be able to continue the tests.
REBOOT_SUCCESS_LINE = ${MACHINE} login:
# The following is different ways to end the test.
# by setting the variable REBOOT to: none, error, fail or
# something else, ktest will power cycle or reboot the target box
# at the end of the tests.
#
# REBOOT := none
# Don't do anything at the end of the test.
#
# REBOOT := error
# Reboot the box if ktest detects an error
#
# REBOOT := fail
# Do not stop on failure, and after all tests are complete
# power off the box (for both success and error)
# This is good to run over a weekend and you don't want to waste
# electricity.
#
DEFAULTS IF ${REBOOT} == none
REBOOT_ON_SUCCESS = 0
REBOOT_ON_ERROR = 0
POWEROFF_ON_ERROR = 0
POWEROFF_ON_SUCCESS = 0
DEFAULTS ELSE IF ${REBOOT} == error
REBOOT_ON_SUCCESS = 0
REBOOT_ON_ERROR = 1
POWEROFF_ON_ERROR = 0
POWEROFF_ON_SUCCESS = 0
DEFAULTS ELSE IF ${REBOOT} == fail
REBOOT_ON_SUCCESS = 0
POWEROFF_ON_ERROR = 1
POWEROFF_ON_SUCCESS = 1
POWEROFF_AFTER_HALT = 120
DIE_ON_FAILURE = 0
# Store the failure information into this directory
# such as the .config, dmesg, and build log.
STORE_FAILURES = ${THIS_DIR}/failures
DEFAULTS ELSE
REBOOT_ON_SUCCESS = 1
REBOOT_ON_ERROR = 1
POWEROFF_ON_ERROR = 0
POWEROFF_ON_SUCCESS = 0

View file

@ -0,0 +1,60 @@
#
# This file has some examples for creating a MIN_CONFIG.
# (A .config file that is the minimum for a machine to boot, or
# to boot and make a network connection.)
#
# A MIN_CONFIG is very useful as it is the minimum configuration
# needed to boot a given machine. You can debug someone else's
# .config by only setting the configs in your MIN_CONFIG. The closer
# your MIN_CONFIG is to the true minimum set of configs needed to
# boot your machine, the closer the config you test with will be
# to the users config that had the failure.
#
# The make_min_config test allows you to create a MIN_CONFIG that
# is truly the minimum set of configs needed to boot a box.
#
# In this example, the final config will reside in
# ${CONFIG_DIR}/config-new-min and ${CONFIG_DIR}/config-new-min-net.
# Just move one to the location you have set for MIN_CONFIG.
#
# The first test creates a MIN_CONFIG that will be the minimum
# configuration to boot ${MACHINE} and be able to ssh to it.
#
# The second test creates a MIN_CONFIG that will only boot
# the target and most likely will not let you ssh to it. (Notice
# how the second test uses the first test's result to continue with.
# This is because the second test config is a subset of the first).
#
# The ${CONFIG_DIR}/config-skip (and -net) will hold the configs
# that ktest.pl found would not boot the target without them set.
# The config-new-min holds configs that ktest.pl could not test
# directly because another config that was needed to boot the box
# selected them. Sometimes it is possible that this file will hold
# the true minimum configuration. You can test to see if this is
# the case by running the boot test with BOOT_TYPE = allnoconfig and
# setting setting the MIN_CONFIG to ${CONFIG_DIR}/config-skip. If the
# machine still boots, then you can use the config-skip as your MIN_CONFIG.
#
# These tests can run for several hours (and perhaps days).
# It's OK to kill the test with a Ctrl^C. By restarting without
# modifying this config, ktest.pl will notice that the config-new-min(-net)
# exists, and will use that instead as the starting point.
# The USE_OUTPUT_MIN_CONFIG is set to 1 to keep ktest.pl from asking
# you if you want to use the OUTPUT_MIN_CONFIG as the starting point.
# By using the OUTPUT_MIN_CONFIG as the starting point will allow ktest.pl to
# start almost where it left off.
#
TEST_START IF ${TEST} == min-config
TEST_TYPE = make_min_config
OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
IGNORE_CONFIG = ${CONFIG_DIR}/config-skip-net
MIN_CONFIG_TYPE = test
TEST = ${SSH} echo hi
USE_OUTPUT_MIN_CONFIG = 1
TEST_START IF ${TEST} == min-config && ${MULTI}
TEST_TYPE = make_min_config
OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min
IGNORE_CONFIG = ${CONFIG_DIR}/config-skip
MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
USE_OUTPUT_MIN_CONFIG = 1

View file

@ -0,0 +1,111 @@
# patchcheck.conf
#
# This contains a test that takes two git commits and will test each
# commit between the two. The build test will look at what files the
# commit has touched, and if any of those files produce a warning, then
# the build will fail.
# PATCH_START is the commit to begin with and PATCH_END is the commit
# to end with (inclusive). This is similar to doing a git rebase -i PATCH_START~1
# and then testing each commit and doing a git rebase --continue.
# You can use a SHA1, a git tag, or anything that git will accept for a checkout
PATCH_START := HEAD~3
PATCH_END := HEAD
# Use the oldconfig if build_type wasn't defined
DEFAULTS IF NOT DEFINED BUILD_TYPE
DO_BUILD_TYPE := oldconfig
DEFAULTS ELSE
DO_BUILD_TYPE := ${BUILD_TYPE}
DEFAULTS
# Change PATCH_CHECKOUT to be the branch you want to test. The test will
# do a git checkout of this branch before starting. Obviously both
# PATCH_START and PATCH_END must be in this branch (and PATCH_START must
# be contained by PATCH_END).
PATCH_CHECKOUT := test/branch
# Usually it's a good idea to have a set config to use for testing individual
# patches.
PATCH_CONFIG := ${CONFIG_DIR}/config-patchcheck
# Change PATCH_TEST to run some test for each patch. Each commit that is
# tested, after it is built and installed on the test machine, this command
# will be executed. Usually what is done is to ssh to the target box and
# run some test scripts. If you just want to boot test your patches
# comment PATCH_TEST out.
PATCH_TEST := ${SSH} "/usr/local/bin/ktest-test-script"
DEFAULTS IF DEFINED PATCH_TEST
PATCH_TEST_TYPE := test
DEFAULTS ELSE
PATCH_TEST_TYPE := boot
# If for some reason a file has a warning that one of your patches touch
# but you do not care about it, set IGNORE_WARNINGS to that commit(s)
# (space delimited)
#IGNORE_WARNINGS = 39eaf7ef884dcc44f7ff1bac803ca2a1dcf43544 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce
# Instead of just checking for warnings to files that are changed
# it can be advantageous to check for any new warnings. If a
# header file is changed, it could cause a warning in a file not
# touched by the commit. To detect these kinds of warnings, you
# can use the WARNINGS_FILE option.
#
# If the variable CREATE_WARNINGS_FILE is set, this config will
# enable the WARNINGS_FILE during the patchcheck test. Also,
# before running the patchcheck test, it will create the
# warnings file.
#
DEFAULTS IF DEFINED CREATE_WARNINGS_FILE
WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
TEST_START IF DEFINED CREATE_WARNINGS_FILE
# WARNINGS_FILE is already set by the DEFAULTS above
TEST_TYPE = make_warnings_file
# Checkout the commit before the patches to test,
# and record all the warnings that exist before the patches
# to test are added
CHECKOUT = ${PATCHCHECK_START}~1
# Force a full build
BUILD_NOCLEAN = 0
BUILD_TYPE = ${DO_BUILD_TYPE}
# If you are running a multi test, and the test failed on the first
# test but on, say the 5th patch. If you want to restart on the
# fifth patch, set PATCH_START1. This will make the first test start
# from this commit instead of the PATCH_START commit.
# Note, do not change this option. Just define PATCH_START1 in the
# top config (the one you pass to ktest.pl), and this will use it,
# otherwise it will just use PATCH_START if PATCH_START1 is not defined.
DEFAULTS IF NOT DEFINED PATCH_START1
PATCH_START1 := ${PATCH_START}
TEST_START IF ${TEST} == patchcheck
TEST_TYPE = patchcheck
MIN_CONFIG = ${PATCH_CONFIG}
TEST = ${PATCH_TEST}
PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
PATCHCHECK_START = ${PATCH_START1}
PATCHCHECK_END = ${PATCH_END}
CHECKOUT = ${PATCH_CHECKOUT}
BUILD_TYPE = ${DO_BUILD_TYPE}
TEST_START IF ${TEST} == patchcheck && ${MULTI}
TEST_TYPE = patchcheck
MIN_CONFIG = ${PATCH_CONFIG}
TEST = ${PATCH_TEST}
PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
PATCHCHECK_START = ${PATCH_START}
PATCHCHECK_END = ${PATCH_END}
CHECKOUT = ${PATCH_CHECKOUT}
# Use multi to test different compilers?
MAKE_CMD = CC=gcc-4.5.1 make
BUILD_TYPE = ${DO_BUILD_TYPE}

View file

@ -0,0 +1,74 @@
#
# This is an example of various tests that you can run
#
# The variable TEST can be of boot, build, randconfig, or test.
#
# Note that TEST is a variable created with ':=' and only exists
# throughout the config processing (not during the tests itself).
#
# The TEST option (defined with '=') is used to tell ktest.pl
# what test to run after a successful boot. The TEST option is
# persistent into the test runs.
#
# The config that includes this file may define a BOOT_TYPE
# variable that tells this config what type of boot test to run.
# If it's not defined, the below DEFAULTS will set the default
# to 'oldconfig'.
#
DEFAULTS IF NOT DEFINED BOOT_TYPE
BOOT_TYPE := oldconfig
# The config that includes this file may define a RUN_TEST
# variable that will tell this config what test to run.
# (what to set the TEST option to).
#
DEFAULTS IF NOT DEFINED RUN_TEST
# Requires that hackbench is in the PATH
RUN_TEST := ${SSH} hackbench 50
# If TEST is set to 'boot' then just build a kernel and boot
# the target.
TEST_START IF ${TEST} == boot
TEST_TYPE = boot
# Notice how we set the BUILD_TYPE option to the BOOT_TYPE variable.
BUILD_TYPE = ${BOOT_TYPE}
# Do not do a make mrproper.
BUILD_NOCLEAN = 1
# If you only want to build the kernel, and perhaps install
# and test it yourself, then just set TEST to build.
TEST_START IF ${TEST} == build
TEST_TYPE = build
BUILD_TYPE = ${BOOT_TYPE}
BUILD_NOCLEAN = 1
# Build, install, boot and test with a randconfg 10 times.
# It is important that you have set MIN_CONFIG in the config
# that includes this file otherwise it is likely that the
# randconfig will not have the necessary configs needed to
# boot your box. This version of the test requires a min
# config that has enough to make sure the target has network
# working.
TEST_START ITERATE 10 IF ${TEST} == randconfig
MIN_CONFIG = ${CONFIG_DIR}/config-min-net
TEST_TYPE = test
BUILD_TYPE = randconfig
TEST = ${RUN_TEST}
# This is the same as above, but only tests to a boot prompt.
# The MIN_CONFIG used here does not need to have networking
# working.
TEST_START ITERATE 10 IF ${TEST} == randconfig && ${MULTI}
TEST_TYPE = boot
BUILD_TYPE = randconfig
MIN_CONFIG = ${CONFIG_DIR}/config-min
MAKE_CMD = make
# This builds, installs, boots and tests the target.
TEST_START IF ${TEST} == test
TEST_TYPE = test
BUILD_TYPE = ${BOOT_TYPE}
TEST = ${RUN_TEST}
BUILD_NOCLEAN = 1

View file

@ -0,0 +1,92 @@
#
# This config is an example usage of ktest.pl with a kvm guest
#
# The guest is called 'Guest' and this would be something that
# could be run on the host to test a virtual machine target.
MACHINE = Guest
# Use virsh to read the serial console of the guest
CONSOLE = virsh console ${MACHINE}
# Use SIGKILL to terminate virsh console. We can't kill virsh console
# by the default signal, SIGINT.
CLOSE_CONSOLE_SIGNAL = KILL
#*************************************#
# This part is the same as test.conf #
#*************************************#
# The include files will set up the type of test to run. Just set TEST to
# which test you want to run.
#
# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
#
# See the include/*.conf files that define these tests
#
TEST := patchcheck
# Some tests may have more than one test to run. Define MULTI := 1 to run
# the extra tests.
MULTI := 0
# In case you want to differentiate which type of system you are testing
BITS := 64
# REBOOT = none, error, fail, empty
# See include/defaults.conf
REBOOT := empty
# The defaults file will set up various settings that can be used by all
# machine configs.
INCLUDE include/defaults.conf
#*************************************#
# Now we are different from test.conf #
#*************************************#
# The example here assumes that Guest is running a Fedora release
# that uses dracut for its initfs. The POST_INSTALL will be executed
# after the install of the kernel and modules are complete.
#
POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
# Guests sometimes get stuck on reboot. We wait 3 seconds after running
# the reboot command and then do a full power-cycle of the guest.
# This forces the guest to restart.
#
POWERCYCLE_AFTER_REBOOT = 3
# We do the same after the halt command, but this time we wait 20 seconds.
POWEROFF_AFTER_HALT = 20
# As the defaults.conf file has a POWER_CYCLE option already defined,
# and options can not be defined in the same section more than once
# (all DEFAULTS sections are considered the same). We use the
# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
# options, for the options set in the OVERRIDE section.
#
DEFAULTS OVERRIDE
# Instead of using the default POWER_CYCLE option defined in
# defaults.conf, we use virsh to cycle it. To do so, we destroy
# the guest, wait 5 seconds, and then start it up again.
# Crude, but effective.
#
POWER_CYCLE = virsh destroy ${MACHINE}; sleep 5; virsh start ${MACHINE}
DEFAULTS
# The following files each handle a different test case.
# Having them included allows you to set up more than one machine and share
# the same tests.
INCLUDE include/patchcheck.conf
INCLUDE include/tests.conf
INCLUDE include/bisect.conf
INCLUDE include/min-config.conf

View file

@ -0,0 +1,53 @@
# This example was used to boot the snowball ARM board.
# See http://people.redhat.com/srostedt/ktest-embedded-2012/
# PWD is a ktest.pl variable that will result in the process working
# directory that ktest.pl is executed in.
# THIS_DIR is automatically assigned the PWD of the path that generated
# the config file. It is best to use this variable when assigning other
# directory paths within this directory. This allows you to easily
# move the test cases to other locations or to other machines.
#
THIS_DIR := /home/rostedt/work/demo/ktest-embed
LOG_FILE = ${OUTPUT_DIR}/snowball.log
CLEAR_LOG = 1
MAKE_CMD = PATH=/usr/local/gcc-4.5.2-nolibc/arm-unknown-linux-gnueabi/bin:$PATH CROSS_COMPILE=arm-unknown-linux-gnueabi- make ARCH=arm
ADD_CONFIG = ${THIS_DIR}/addconfig
SCP_TO_TARGET = echo "don't do scp"
TFTPBOOT := /var/lib/tftpboot
TFTPDEF := ${TFTPBOOT}/snowball-default
TFTPTEST := ${OUTPUT_DIR}/${BUILD_TARGET}
SWITCH_TO_GOOD = cp ${TFTPDEF} ${TARGET_IMAGE}
SWITCH_TO_TEST = cp ${TFTPTEST} ${TARGET_IMAGE}
# Define each test with TEST_START
# The config options below it will override the defaults
TEST_START SKIP
TEST_TYPE = boot
BUILD_TYPE = u8500_defconfig
BUILD_NOCLEAN = 1
TEST_START
TEST_TYPE = make_min_config
OUTPUT_MIN_CONFIG = ${THIS_DIR}/config.newmin
START_MIN_CONFIG = ${THIS_DIR}/config.orig
IGNORE_CONFIG = ${THIS_DIR}/config.ignore
BUILD_NOCLEAN = 1
DEFAULTS
LOCALVERSION = -test
POWER_CYCLE = echo use the thumb luke; read a
CONSOLE = cat ${THIS_DIR}/snowball-cat
REBOOT_TYPE = script
SSH_USER = root
BUILD_OPTIONS = -j8 uImage
BUILD_DIR = ${THIS_DIR}/linux.git
OUTPUT_DIR = ${THIS_DIR}/snowball-build
MACHINE = snowball
TARGET_IMAGE = /var/lib/tftpboot/snowball-image
BUILD_TARGET = arch/arm/boot/uImage

View file

@ -0,0 +1,62 @@
#
# Generic config for a machine
#
# Name your machine (the DNS name, what you ssh to)
MACHINE = foo
# BOX can be different than foo, if the machine BOX has
# multiple partitions with different systems installed. For example,
# you may have a i386 and x86_64 installation on a test box.
# If this is the case, MACHINE defines the way to connect to the
# machine, which may be different between which system the machine
# is booting into. BOX is used for the scripts to reboot and power cycle
# the machine, where it does not matter which system the machine boots into.
#
#BOX := bar
# Define a way to read the console
CONSOLE = stty -F /dev/ttyS0 115200 parodd; cat /dev/ttyS0
# The include files will set up the type of test to run. Just set TEST to
# which test you want to run.
#
# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
#
# See the include/*.conf files that define these tests
#
TEST := patchcheck
# Some tests may have more than one test to run. Define MULTI := 1 to run
# the extra tests.
MULTI := 0
# In case you want to differentiate which type of system you are testing
BITS := 64
# REBOOT = none, error, fail, empty
# See include/defaults.conf
REBOOT := empty
# The defaults file will set up various settings that can be used by all
# machine configs.
INCLUDE include/defaults.conf
# In case you need to add a patch for a bisect or something
#PRE_BUILD = patch -p1 < ${THIS_DIR}/fix.patch
# Reset the repo after the build and remove all 'test' modules from the target
# Notice that DO_POST_BUILD is a variable (defined by ':=') and POST_BUILD
# is the option (defined by '=')
DO_POST_BUILD := git reset --hard
POST_BUILD = ${SSH} 'rm -rf /lib/modules/*-test*'; ${DO_POST_BUILD}
# The following files each handle a different test case.
# Having them included allows you to set up more than one machine and share
# the same tests.
INCLUDE include/patchcheck.conf
INCLUDE include/tests.conf
INCLUDE include/bisect.conf
INCLUDE include/min-config.conf

4175
tools/testing/ktest/ktest.pl Executable file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,50 @@
TARGETS = breakpoints
TARGETS += cpu-hotplug
TARGETS += efivarfs
TARGETS += kcmp
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mqueue
TARGETS += mount
TARGETS += net
TARGETS += ptrace
TARGETS += timers
TARGETS += vm
TARGETS += powerpc
TARGETS += user
TARGETS += sysctl
TARGETS += firmware
TARGETS += ftrace
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
all:
for TARGET in $(TARGETS); do \
make -C $$TARGET; \
done;
run_tests: all
for TARGET in $(TARGETS); do \
make -C $$TARGET run_tests; \
done;
hotplug:
for TARGET in $(TARGETS_HOTPLUG); do \
make -C $$TARGET; \
done;
run_hotplug: hotplug
for TARGET in $(TARGETS_HOTPLUG); do \
make -C $$TARGET run_full_test; \
done;
clean_hotplug:
for TARGET in $(TARGETS_HOTPLUG); do \
make -C $$TARGET clean; \
done;
clean:
for TARGET in $(TARGETS); do \
make -C $$TARGET clean; \
done;

View file

@ -0,0 +1,61 @@
Linux Kernel Selftests
The kernel contains a set of "self tests" under the tools/testing/selftests/
directory. These are intended to be small unit tests to exercise individual
code paths in the kernel.
On some systems, hot-plug tests could hang forever waiting for cpu and
memory to be ready to be offlined. A special hot-plug target is created
to run full range of hot-plug tests. In default mode, hot-plug tests run
in safe mode with a limited scope. In limited mode, cpu-hotplug test is
run on a single cpu as opposed to all hotplug capable cpus, and memory
hotplug test is run on 2% of hotplug capable memory instead of 10%.
Running the selftests (hotplug tests are run in limited mode)
=============================================================
To build the tests:
$ make -C tools/testing/selftests
To run the tests:
$ make -C tools/testing/selftests run_tests
- note that some tests will require root privileges.
To run only tests targeted for a single subsystem: (including
hotplug targets in limited mode)
$ make -C tools/testing/selftests TARGETS=cpu-hotplug run_tests
See the top-level tools/testing/selftests/Makefile for the list of all possible
targets.
Running the full range hotplug selftests
========================================
To build the tests:
$ make -C tools/testing/selftests hotplug
To run the tests:
$ make -C tools/testing/selftests run_hotplug
- note that some tests will require root privileges.
Contributing new tests
======================
In general, the rules for for selftests are
* Do as much as you can if you're not root;
* Don't take too long;
* Don't break the build on any architecture, and
* Don't cause the top-level "make run_tests" to fail if your feature is
unconfigured.

View file

@ -0,0 +1,23 @@
# Taken from perf makefile
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
ifeq ($(ARCH),i386)
ARCH := x86
endif
ifeq ($(ARCH),x86_64)
ARCH := x86
endif
all:
ifeq ($(ARCH),x86)
gcc breakpoint_test.c -o breakpoint_test
else
echo "Not an x86 target, can't build breakpoints selftests"
endif
run_tests:
@./breakpoint_test || echo "breakpoints selftests: [FAIL]"
clean:
rm -fr breakpoint_test

View file

@ -0,0 +1,394 @@
/*
* Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
*
* Licensed under the terms of the GNU GPL License version 2
*
* Selftests for breakpoints (and more generally the do_debug() path) in x86.
*/
#include <sys/ptrace.h>
#include <unistd.h>
#include <stddef.h>
#include <sys/user.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
/* Breakpoint access modes */
enum {
BP_X = 1,
BP_RW = 2,
BP_W = 4,
};
static pid_t child_pid;
/*
* Ensures the child and parent are always "talking" about
* the same test sequence. (ie: that we haven't forgotten
* to call check_trapped() somewhere).
*/
static int nr_tests;
static void set_breakpoint_addr(void *addr, int n)
{
int ret;
ret = ptrace(PTRACE_POKEUSER, child_pid,
offsetof(struct user, u_debugreg[n]), addr);
if (ret) {
perror("Can't set breakpoint addr\n");
exit(-1);
}
}
static void toggle_breakpoint(int n, int type, int len,
int local, int global, int set)
{
int ret;
int xtype, xlen;
unsigned long vdr7, dr7;
switch (type) {
case BP_X:
xtype = 0;
break;
case BP_W:
xtype = 1;
break;
case BP_RW:
xtype = 3;
break;
}
switch (len) {
case 1:
xlen = 0;
break;
case 2:
xlen = 4;
break;
case 4:
xlen = 0xc;
break;
case 8:
xlen = 8;
break;
}
dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
offsetof(struct user, u_debugreg[7]), 0);
vdr7 = (xlen | xtype) << 16;
vdr7 <<= 4 * n;
if (local) {
vdr7 |= 1 << (2 * n);
vdr7 |= 1 << 8;
}
if (global) {
vdr7 |= 2 << (2 * n);
vdr7 |= 1 << 9;
}
if (set)
dr7 |= vdr7;
else
dr7 &= ~vdr7;
ret = ptrace(PTRACE_POKEUSER, child_pid,
offsetof(struct user, u_debugreg[7]), dr7);
if (ret) {
perror("Can't set dr7");
exit(-1);
}
}
/* Dummy variables to test read/write accesses */
static unsigned long long dummy_var[4];
/* Dummy functions to test execution accesses */
static void dummy_func(void) { }
static void dummy_func1(void) { }
static void dummy_func2(void) { }
static void dummy_func3(void) { }
static void (*dummy_funcs[])(void) = {
dummy_func,
dummy_func1,
dummy_func2,
dummy_func3,
};
static int trapped;
static void check_trapped(void)
{
/*
* If we haven't trapped, wake up the parent
* so that it notices the failure.
*/
if (!trapped)
kill(getpid(), SIGUSR1);
trapped = 0;
nr_tests++;
}
static void write_var(int len)
{
char *pcval; short *psval; int *pival; long long *plval;
int i;
for (i = 0; i < 4; i++) {
switch (len) {
case 1:
pcval = (char *)&dummy_var[i];
*pcval = 0xff;
break;
case 2:
psval = (short *)&dummy_var[i];
*psval = 0xffff;
break;
case 4:
pival = (int *)&dummy_var[i];
*pival = 0xffffffff;
break;
case 8:
plval = (long long *)&dummy_var[i];
*plval = 0xffffffffffffffffLL;
break;
}
check_trapped();
}
}
static void read_var(int len)
{
char cval; short sval; int ival; long long lval;
int i;
for (i = 0; i < 4; i++) {
switch (len) {
case 1:
cval = *(char *)&dummy_var[i];
break;
case 2:
sval = *(short *)&dummy_var[i];
break;
case 4:
ival = *(int *)&dummy_var[i];
break;
case 8:
lval = *(long long *)&dummy_var[i];
break;
}
check_trapped();
}
}
/*
* Do the r/w/x accesses to trigger the breakpoints. And run
* the usual traps.
*/
static void trigger_tests(void)
{
int len, local, global, i;
char val;
int ret;
ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
if (ret) {
perror("Can't be traced?\n");
return;
}
/* Wake up father so that it sets up the first test */
kill(getpid(), SIGUSR1);
/* Test instruction breakpoints */
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
if (!local && !global)
continue;
for (i = 0; i < 4; i++) {
dummy_funcs[i]();
check_trapped();
}
}
}
/* Test write watchpoints */
for (len = 1; len <= sizeof(long); len <<= 1) {
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
if (!local && !global)
continue;
write_var(len);
}
}
}
/* Test read/write watchpoints (on read accesses) */
for (len = 1; len <= sizeof(long); len <<= 1) {
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
if (!local && !global)
continue;
read_var(len);
}
}
}
/* Icebp trap */
asm(".byte 0xf1\n");
check_trapped();
/* Int 3 trap */
asm("int $3\n");
check_trapped();
kill(getpid(), SIGUSR1);
}
static void check_success(const char *msg)
{
const char *msg2;
int child_nr_tests;
int status;
/* Wait for the child to SIGTRAP */
wait(&status);
msg2 = "Failed";
if (WSTOPSIG(status) == SIGTRAP) {
child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
&nr_tests, 0);
if (child_nr_tests == nr_tests)
msg2 = "Ok";
if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) {
perror("Can't poke\n");
exit(-1);
}
}
nr_tests++;
printf("%s [%s]\n", msg, msg2);
}
static void launch_instruction_breakpoints(char *buf, int local, int global)
{
int i;
for (i = 0; i < 4; i++) {
set_breakpoint_addr(dummy_funcs[i], i);
toggle_breakpoint(i, BP_X, 1, local, global, 1);
ptrace(PTRACE_CONT, child_pid, NULL, 0);
sprintf(buf, "Test breakpoint %d with local: %d global: %d",
i, local, global);
check_success(buf);
toggle_breakpoint(i, BP_X, 1, local, global, 0);
}
}
static void launch_watchpoints(char *buf, int mode, int len,
int local, int global)
{
const char *mode_str;
int i;
if (mode == BP_W)
mode_str = "write";
else
mode_str = "read";
for (i = 0; i < 4; i++) {
set_breakpoint_addr(&dummy_var[i], i);
toggle_breakpoint(i, mode, len, local, global, 1);
ptrace(PTRACE_CONT, child_pid, NULL, 0);
sprintf(buf, "Test %s watchpoint %d with len: %d local: "
"%d global: %d", mode_str, i, len, local, global);
check_success(buf);
toggle_breakpoint(i, mode, len, local, global, 0);
}
}
/* Set the breakpoints and check the child successfully trigger them */
static void launch_tests(void)
{
char buf[1024];
int len, local, global, i;
/* Instruction breakpoints */
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
if (!local && !global)
continue;
launch_instruction_breakpoints(buf, local, global);
}
}
/* Write watchpoint */
for (len = 1; len <= sizeof(long); len <<= 1) {
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
if (!local && !global)
continue;
launch_watchpoints(buf, BP_W, len,
local, global);
}
}
}
/* Read-Write watchpoint */
for (len = 1; len <= sizeof(long); len <<= 1) {
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
if (!local && !global)
continue;
launch_watchpoints(buf, BP_RW, len,
local, global);
}
}
}
/* Icebp traps */
ptrace(PTRACE_CONT, child_pid, NULL, 0);
check_success("Test icebp");
/* Int 3 traps */
ptrace(PTRACE_CONT, child_pid, NULL, 0);
check_success("Test int 3 trap");
ptrace(PTRACE_CONT, child_pid, NULL, 0);
}
int main(int argc, char **argv)
{
pid_t pid;
int ret;
pid = fork();
if (!pid) {
trigger_tests();
return 0;
}
child_pid = pid;
wait(NULL);
launch_tests();
wait(NULL);
return 0;
}

View file

@ -0,0 +1,9 @@
all:
run_tests:
@/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
run_full_test:
@/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
clean:

View file

@ -0,0 +1,269 @@
#!/bin/bash
SYSFS=
prerequisite()
{
msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
exit 0
fi
taskset -p 01 $$
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
echo $msg sysfs is not mounted >&2
exit 0
fi
if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
echo $msg cpu hotplug is not supported >&2
exit 0
fi
echo "CPU online/offline summary:"
online_cpus=`cat $SYSFS/devices/system/cpu/online`
online_max=${online_cpus##*-}
echo -e "\t Cpus in online state: $online_cpus"
offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
if [[ "a$offline_cpus" = "a" ]]; then
offline_cpus=0
else
offline_max=${offline_cpus##*-}
fi
echo -e "\t Cpus in offline state: $offline_cpus"
}
#
# list all hot-pluggable CPUs
#
hotpluggable_cpus()
{
local state=${1:-.\*}
for cpu in $SYSFS/devices/system/cpu/cpu*; do
if [ -f $cpu/online ] && grep -q $state $cpu/online; then
echo ${cpu##/*/cpu}
fi
done
}
hotplaggable_offline_cpus()
{
hotpluggable_cpus 0
}
hotpluggable_online_cpus()
{
hotpluggable_cpus 1
}
cpu_is_online()
{
grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
}
cpu_is_offline()
{
grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
}
online_cpu()
{
echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
}
offline_cpu()
{
echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
}
online_cpu_expect_success()
{
local cpu=$1
if ! online_cpu $cpu; then
echo $FUNCNAME $cpu: unexpected fail >&2
elif ! cpu_is_online $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
fi
}
online_cpu_expect_fail()
{
local cpu=$1
if online_cpu $cpu 2> /dev/null; then
echo $FUNCNAME $cpu: unexpected success >&2
elif ! cpu_is_offline $cpu; then
echo $FUNCNAME $cpu: unexpected online >&2
fi
}
offline_cpu_expect_success()
{
local cpu=$1
if ! offline_cpu $cpu; then
echo $FUNCNAME $cpu: unexpected fail >&2
elif ! cpu_is_offline $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
fi
}
offline_cpu_expect_fail()
{
local cpu=$1
if offline_cpu $cpu 2> /dev/null; then
echo $FUNCNAME $cpu: unexpected success >&2
elif ! cpu_is_online $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
fi
}
error=-12
allcpus=0
priority=0
online_cpus=0
online_max=0
offline_cpus=0
offline_max=0
while getopts e:ahp: opt; do
case $opt in
e)
error=$OPTARG
;;
a)
allcpus=1
;;
h)
echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
echo -e "\t default offline one cpu"
echo -e "\t run with -a option to offline all cpus"
exit
;;
p)
priority=$OPTARG
;;
esac
done
if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
echo "error code must be -4095 <= errno < 0" >&2
exit 1
fi
prerequisite
#
# Safe test (default) - offline and online one cpu
#
if [ $allcpus -eq 0 ]; then
echo "Limited scope test: one hotplug cpu"
echo -e "\t (leaves cpu in the original state):"
echo -e "\t online to offline to online: cpu $online_max"
offline_cpu_expect_success $online_max
online_cpu_expect_success $online_max
if [[ $offline_cpus -gt 0 ]]; then
echo -e "\t offline to online to offline: cpu $offline_max"
online_cpu_expect_success $offline_max
offline_cpu_expect_success $offline_max
fi
exit 0
else
echo "Full scope test: all hotplug cpus"
echo -e "\t online all offline cpus"
echo -e "\t offline all online cpus"
echo -e "\t online all offline cpus"
fi
#
# Online all hot-pluggable CPUs
#
for cpu in `hotplaggable_offline_cpus`; do
online_cpu_expect_success $cpu
done
#
# Offline all hot-pluggable CPUs
#
for cpu in `hotpluggable_online_cpus`; do
offline_cpu_expect_success $cpu
done
#
# Online all hot-pluggable CPUs again
#
for cpu in `hotplaggable_offline_cpus`; do
online_cpu_expect_success $cpu
done
#
# Test with cpu notifier error injection
#
DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
prerequisite_extra()
{
msg="skip extra tests:"
/sbin/modprobe -q -r cpu-notifier-error-inject
/sbin/modprobe -q cpu-notifier-error-inject priority=$priority
if [ ! -d "$DEBUGFS" ]; then
echo $msg debugfs is not mounted >&2
exit 0
fi
if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
echo $msg cpu-notifier-error-inject module is not available >&2
exit 0
fi
}
prerequisite_extra
#
# Offline all hot-pluggable CPUs
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
for cpu in `hotpluggable_online_cpus`; do
offline_cpu_expect_success $cpu
done
#
# Test CPU hot-add error handling (offline => online)
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
for cpu in `hotplaggable_offline_cpus`; do
online_cpu_expect_fail $cpu
done
#
# Online all hot-pluggable CPUs
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
for cpu in `hotplaggable_offline_cpus`; do
online_cpu_expect_success $cpu
done
#
# Test CPU hot-remove error handling (online => offline)
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
for cpu in `hotpluggable_online_cpus`; do
offline_cpu_expect_fail $cpu
done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
/sbin/modprobe -q -r cpu-notifier-error-inject

View file

@ -0,0 +1,12 @@
CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall
test_objs = open-unlink create-read
all: $(test_objs)
run_tests: all
@/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]"
clean:
rm -f $(test_objs)

View file

@ -0,0 +1,38 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
int main(int argc, char **argv)
{
const char *path;
char buf[4];
int fd, rc;
if (argc < 2) {
fprintf(stderr, "usage: %s <path>\n", argv[0]);
return EXIT_FAILURE;
}
path = argv[1];
/* create a test variable */
fd = open(path, O_RDWR | O_CREAT, 0600);
if (fd < 0) {
perror("open(O_WRONLY)");
return EXIT_FAILURE;
}
rc = read(fd, buf, sizeof(buf));
if (rc != 0) {
fprintf(stderr, "Reading a new var should return EOF\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,198 @@
#!/bin/bash
efivarfs_mount=/sys/firmware/efi/efivars
test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
check_prereqs()
{
local msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
exit 0
fi
if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
echo $msg efivarfs is not mounted on $efivarfs_mount >&2
exit 0
fi
}
run_test()
{
local test="$1"
echo "--------------------"
echo "running $test"
echo "--------------------"
if [ "$(type -t $test)" = 'function' ]; then
( $test )
else
( ./$test )
fi
if [ $? -ne 0 ]; then
echo " [FAIL]"
rc=1
else
echo " [PASS]"
fi
}
test_create()
{
local attrs='\x07\x00\x00\x00'
local file=$efivarfs_mount/$FUNCNAME-$test_guid
printf "$attrs\x00" > $file
if [ ! -e $file ]; then
echo "$file couldn't be created" >&2
exit 1
fi
if [ $(stat -c %s $file) -ne 5 ]; then
echo "$file has invalid size" >&2
exit 1
fi
}
test_create_empty()
{
local file=$efivarfs_mount/$FUNCNAME-$test_guid
: > $file
if [ ! -e $file ]; then
echo "$file can not be created without writing" >&2
exit 1
fi
}
test_create_read()
{
local file=$efivarfs_mount/$FUNCNAME-$test_guid
./create-read $file
}
test_delete()
{
local attrs='\x07\x00\x00\x00'
local file=$efivarfs_mount/$FUNCNAME-$test_guid
printf "$attrs\x00" > $file
if [ ! -e $file ]; then
echo "$file couldn't be created" >&2
exit 1
fi
rm $file
if [ -e $file ]; then
echo "$file couldn't be deleted" >&2
exit 1
fi
}
# test that we can remove a variable by issuing a write with only
# attributes specified
test_zero_size_delete()
{
local attrs='\x07\x00\x00\x00'
local file=$efivarfs_mount/$FUNCNAME-$test_guid
printf "$attrs\x00" > $file
if [ ! -e $file ]; then
echo "$file does not exist" >&2
exit 1
fi
printf "$attrs" > $file
if [ -e $file ]; then
echo "$file should have been deleted" >&2
exit 1
fi
}
test_open_unlink()
{
local file=$efivarfs_mount/$FUNCNAME-$test_guid
./open-unlink $file
}
# test that we can create a range of filenames
test_valid_filenames()
{
local attrs='\x07\x00\x00\x00'
local ret=0
local file_list="abc dump-type0-11-1-1362436005 1234 -"
for f in $file_list; do
local file=$efivarfs_mount/$f-$test_guid
printf "$attrs\x00" > $file
if [ ! -e $file ]; then
echo "$file could not be created" >&2
ret=1
else
rm $file
fi
done
exit $ret
}
test_invalid_filenames()
{
local attrs='\x07\x00\x00\x00'
local ret=0
local file_list="
-1234-1234-1234-123456789abc
foo
foo-bar
-foo-
foo-barbazba-foob-foob-foob-foobarbazfoo
foo-------------------------------------
-12345678-1234-1234-1234-123456789abc
a-12345678=1234-1234-1234-123456789abc
a-12345678-1234=1234-1234-123456789abc
a-12345678-1234-1234=1234-123456789abc
a-12345678-1234-1234-1234=123456789abc
1112345678-1234-1234-1234-123456789abc"
for f in $file_list; do
local file=$efivarfs_mount/$f
printf "$attrs\x00" 2>/dev/null > $file
if [ -e $file ]; then
echo "Creating $file should have failed" >&2
rm $file
ret=1
fi
done
exit $ret
}
check_prereqs
rc=0
run_test test_create
run_test test_create_empty
run_test test_create_read
run_test test_delete
run_test test_zero_size_delete
run_test test_open_unlink
run_test test_valid_filenames
run_test test_invalid_filenames
exit $rc

View file

@ -0,0 +1,63 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, char **argv)
{
const char *path;
char buf[5];
int fd, rc;
if (argc < 2) {
fprintf(stderr, "usage: %s <path>\n", argv[0]);
return EXIT_FAILURE;
}
path = argv[1];
/* attributes: EFI_VARIABLE_NON_VOLATILE |
* EFI_VARIABLE_BOOTSERVICE_ACCESS |
* EFI_VARIABLE_RUNTIME_ACCESS
*/
*(uint32_t *)buf = 0x7;
buf[4] = 0;
/* create a test variable */
fd = open(path, O_WRONLY | O_CREAT);
if (fd < 0) {
perror("open(O_WRONLY)");
return EXIT_FAILURE;
}
rc = write(fd, buf, sizeof(buf));
if (rc != sizeof(buf)) {
perror("write");
return EXIT_FAILURE;
}
close(fd);
fd = open(path, O_RDONLY);
if (fd < 0) {
perror("open");
return EXIT_FAILURE;
}
if (unlink(path) < 0) {
perror("unlink");
return EXIT_FAILURE;
}
rc = read(fd, buf, sizeof(buf));
if (rc > 0) {
fprintf(stderr, "reading from an unlinked variable "
"shouldn't be possible\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,27 @@
# Makefile for firmware loading selftests
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
fw_filesystem:
@if /bin/sh ./fw_filesystem.sh ; then \
echo "fw_filesystem: ok"; \
else \
echo "fw_filesystem: [FAIL]"; \
exit 1; \
fi
fw_userhelper:
@if /bin/sh ./fw_userhelper.sh ; then \
echo "fw_userhelper: ok"; \
else \
echo "fw_userhelper: [FAIL]"; \
exit 1; \
fi
run_tests: all fw_filesystem fw_userhelper
# Nothing to clean up.
clean:
.PHONY: all clean run_tests fw_filesystem fw_userhelper

View file

@ -0,0 +1,62 @@
#!/bin/sh
# This validates that the kernel will load firmware out of its list of
# firmware locations on disk. Since the user helper does similar work,
# we reset the custom load directory to a location the user helper doesn't
# know so we can be sure we're not accidentally testing the user helper.
set -e
modprobe test_firmware
DIR=/sys/devices/virtual/misc/test_firmware
OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path)
FWPATH=$(mktemp -d)
FW="$FWPATH/test-firmware.bin"
test_finish()
{
echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path
rm -f "$FW"
rmdir "$FWPATH"
}
trap "test_finish" EXIT
# Turn down the timeout so failures don't take so long.
echo 1 >/sys/class/firmware/timeout
# Set the kernel search path.
echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
# This is an unlikely real-world firmware content. :)
echo "ABCD0123" >"$FW"
NAME=$(basename "$FW")
# Request a firmware that doesn't exist, it should fail.
echo -n "nope-$NAME" >"$DIR"/trigger_request
if diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not expected to match" >&2
exit 1
else
echo "$0: timeout works"
fi
# This should succeed via kernel load or will fail after 1 second after
# being handed over to the user helper, which won't find the fw either.
if ! echo -n "$NAME" >"$DIR"/trigger_request ; then
echo "$0: could not trigger request" >&2
exit 1
fi
# Verify the contents are what we expect.
if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not loaded" >&2
exit 1
else
echo "$0: filesystem loading works"
fi
exit 0

View file

@ -0,0 +1,89 @@
#!/bin/sh
# This validates that the kernel will fall back to using the user helper
# to load firmware it can't find on disk itself. We must request a firmware
# that the kernel won't find, and any installed helper (e.g. udev) also
# won't find so that we can do the load ourself manually.
set -e
modprobe test_firmware
DIR=/sys/devices/virtual/misc/test_firmware
OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
FWPATH=$(mktemp -d)
FW="$FWPATH/test-firmware.bin"
test_finish()
{
echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
rm -f "$FW"
rmdir "$FWPATH"
}
load_fw()
{
local name="$1"
local file="$2"
# This will block until our load (below) has finished.
echo -n "$name" >"$DIR"/trigger_request &
# Give kernel a chance to react.
local timeout=10
while [ ! -e "$DIR"/"$name"/loading ]; do
sleep 0.1
timeout=$(( $timeout - 1 ))
if [ "$timeout" -eq 0 ]; then
echo "$0: firmware interface never appeared" >&2
exit 1
fi
done
echo 1 >"$DIR"/"$name"/loading
cat "$file" >"$DIR"/"$name"/data
echo 0 >"$DIR"/"$name"/loading
# Wait for request to finish.
wait
}
trap "test_finish" EXIT
# This is an unlikely real-world firmware content. :)
echo "ABCD0123" >"$FW"
NAME=$(basename "$FW")
# Test failure when doing nothing (timeout works).
echo 1 >/sys/class/firmware/timeout
echo -n "$NAME" >"$DIR"/trigger_request
if diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not expected to match" >&2
exit 1
else
echo "$0: timeout works"
fi
# Put timeout high enough for us to do work but not so long that failures
# slow down this test too much.
echo 4 >/sys/class/firmware/timeout
# Load this script instead of the desired firmware.
load_fw "$NAME" "$0"
if diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not expected to match" >&2
exit 1
else
echo "$0: firmware comparison works"
fi
# Do a proper load, which should work correctly.
load_fw "$NAME" "$FW"
if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not loaded" >&2
exit 1
else
echo "$0: user helper firmware loading works"
fi
exit 0

View file

@ -0,0 +1,7 @@
all:
run_tests:
@/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]"
clean:
rm -rf logs/*

View file

@ -0,0 +1,82 @@
Linux Ftrace Testcases
This is a collection of testcases for ftrace tracing feature in the Linux
kernel. Since ftrace exports interfaces via the debugfs, we just need
shell scripts for testing. Feel free to add new test cases.
Running the ftrace testcases
============================
At first, you need to be the root user to run this script.
To run all testcases:
$ sudo ./ftracetest
To run specific testcases:
# ./ftracetest test.d/basic3.tc
Or you can also run testcases under given directory:
# ./ftracetest test.d/kprobe/
Contributing new testcases
==========================
Copy test.d/template to your testcase (whose filename must have *.tc
extension) and rewrite the test description line.
* The working directory of the script is <debugfs>/tracing/.
* Take care with side effects as the tests are run with root privilege.
* The tests should not run for a long period of time (more than 1 min.)
These are to be unit tests.
* You can add a directory for your testcases under test.d/ if needed.
* The test cases should run on dash (busybox shell) for testing on
minimal cross-build environments.
* Note that the tests are run with "set -e" (errexit) option. If any
command fails, the test will be terminated immediately.
* The tests can return some result codes instead of pass or fail by
using exit_unresolved, exit_untested, exit_unsupported and exit_xfail.
Result code
===========
Ftracetest supports following result codes.
* PASS: The test succeeded as expected. The test which exits with 0 is
counted as passed test.
* FAIL: The test failed, but was expected to succeed. The test which exits
with !0 is counted as failed test.
* UNRESOLVED: The test produced unclear or intermidiate results.
for example, the test was interrupted
or the test depends on a previous test, which failed.
or the test was set up incorrectly
The test which is in above situation, must call exit_unresolved.
* UNTESTED: The test was not run, currently just a placeholder.
In this case, the test must call exit_untested.
* UNSUPPORTED: The test failed because of lack of feature.
In this case, the test must call exit_unsupported.
* XFAIL: The test failed, and was expected to fail.
To return XFAIL, call exit_xfail from the test.
There are some sample test scripts for result code under samples/.
You can also run samples as below:
# ./ftracetest samples/
TODO
====
* Fancy colored output :)

View file

@ -0,0 +1,253 @@
#!/bin/sh
# ftracetest - Ftrace test shell scripts
#
# Copyright (C) Hitachi Ltd., 2014
# Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
#
# Released under the terms of the GPL v2.
usage() { # errno [message]
[ "$2" ] && echo $2
echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
echo " Options:"
echo " -h|--help Show help message"
echo " -k|--keep Keep passed test logs"
echo " -d|--debug Debug mode (trace all shell commands)"
exit $1
}
errexit() { # message
echo "Error: $1" 1>&2
exit 1
}
# Ensuring user privilege
if [ `id -u` -ne 0 ]; then
errexit "this must be run by root user"
fi
# Utilities
absdir() { # file_path
(cd `dirname $1`; pwd)
}
abspath() {
echo `absdir $1`/`basename $1`
}
find_testcases() { #directory
echo `find $1 -name \*.tc`
}
parse_opts() { # opts
local OPT_TEST_CASES=
local OPT_TEST_DIR=
while [ "$1" ]; do
case "$1" in
--help|-h)
usage 0
;;
--keep|-k)
KEEP_LOG=1
shift 1
;;
--debug|-d)
DEBUG=1
shift 1
;;
*.tc)
if [ -f "$1" ]; then
OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
shift 1
else
usage 1 "$1 is not a testcase"
fi
;;
*)
if [ -d "$1" ]; then
OPT_TEST_DIR=`abspath $1`
OPT_TEST_CASES="$OPT_TEST_CASES `find_testcases $OPT_TEST_DIR`"
shift 1
else
usage 1 "Invalid option ($1)"
fi
;;
esac
done
if [ "$OPT_TEST_CASES" ]; then
TEST_CASES=$OPT_TEST_CASES
fi
}
# Parameters
DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
TRACING_DIR=$DEBUGFS_DIR/tracing
TOP_DIR=`absdir $0`
TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
KEEP_LOG=0
DEBUG=0
# Parse command-line options
parse_opts $*
[ $DEBUG -ne 0 ] && set -x
# Verify parameters
if [ -z "$DEBUGFS_DIR" -o ! -d "$TRACING_DIR" ]; then
errexit "No ftrace directory found"
fi
# Preparing logs
LOG_FILE=$LOG_DIR/ftracetest.log
mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
date > $LOG_FILE
prlog() { # messages
echo "$@" | tee -a $LOG_FILE
}
catlog() { #file
cat $1 | tee -a $LOG_FILE
}
prlog "=== Ftrace unit tests ==="
# Testcase management
# Test result codes - Dejagnu extended code
PASS=0 # The test succeeded.
FAIL=1 # The test failed, but was expected to succeed.
UNRESOLVED=2 # The test produced indeterminate results. (e.g. interrupted)
UNTESTED=3 # The test was not run, currently just a placeholder.
UNSUPPORTED=4 # The test failed because of lack of feature.
XFAIL=5 # The test failed, and was expected to fail.
# Accumulations
PASSED_CASES=
FAILED_CASES=
UNRESOLVED_CASES=
UNTESTED_CASES=
UNSUPPORTED_CASES=
XFAILED_CASES=
UNDEFINED_CASES=
TOTAL_RESULT=0
CASENO=0
testcase() { # testfile
CASENO=$((CASENO+1))
prlog -n "[$CASENO]"`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
}
eval_result() { # retval sigval
local retval=$2
if [ $2 -eq 0 ]; then
test $1 -ne 0 && retval=$FAIL
fi
case $retval in
$PASS)
prlog " [PASS]"
PASSED_CASES="$PASSED_CASES $CASENO"
return 0
;;
$FAIL)
prlog " [FAIL]"
FAILED_CASES="$FAILED_CASES $CASENO"
return 1 # this is a bug.
;;
$UNRESOLVED)
prlog " [UNRESOLVED]"
UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
return 1 # this is a kind of bug.. something happened.
;;
$UNTESTED)
prlog " [UNTESTED]"
UNTESTED_CASES="$UNTESTED_CASES $CASENO"
return 0
;;
$UNSUPPORTED)
prlog " [UNSUPPORTED]"
UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
return 1 # this is not a bug, but the result should be reported.
;;
$XFAIL)
prlog " [XFAIL]"
XFAILED_CASES="$XFAILED_CASES $CASENO"
return 0
;;
*)
prlog " [UNDEFINED]"
UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
return 1 # this must be a test bug
;;
esac
}
# Signal handling for result codes
SIG_RESULT=
SIG_BASE=36 # Use realtime signals
SIG_PID=$$
SIG_UNRESOLVED=$((SIG_BASE + UNRESOLVED))
exit_unresolved () {
kill -s $SIG_UNRESOLVED $SIG_PID
exit 0
}
trap 'SIG_RESULT=$UNRESOLVED' $SIG_UNRESOLVED
SIG_UNTESTED=$((SIG_BASE + UNTESTED))
exit_untested () {
kill -s $SIG_UNTESTED $SIG_PID
exit 0
}
trap 'SIG_RESULT=$UNTESTED' $SIG_UNTESTED
SIG_UNSUPPORTED=$((SIG_BASE + UNSUPPORTED))
exit_unsupported () {
kill -s $SIG_UNSUPPORTED $SIG_PID
exit 0
}
trap 'SIG_RESULT=$UNSUPPORTED' $SIG_UNSUPPORTED
SIG_XFAIL=$((SIG_BASE + XFAIL))
exit_xfail () {
kill -s $SIG_XFAIL $SIG_PID
exit 0
}
trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
# Run one test case
run_test() { # testfile
local testname=`basename $1`
local testlog=`mktemp --tmpdir=$LOG_DIR ${testname}-XXXXXX.log`
testcase $1
echo "execute: "$1 > $testlog
SIG_RESULT=0
# setup PID and PPID, $$ is not updated.
(cd $TRACING_DIR; read PID _ < /proc/self/stat ;
set -e; set -x; . $1) >> $testlog 2>&1
eval_result $? $SIG_RESULT
if [ $? -eq 0 ]; then
# Remove test log if the test was done as it was expected.
[ $KEEP_LOG -eq 0 ] && rm $testlog
else
catlog $testlog
TOTAL_RESULT=1
fi
}
# Main loop
for t in $TEST_CASES; do
run_test $t
done
prlog ""
prlog "# of passed: " `echo $PASSED_CASES | wc -w`
prlog "# of failed: " `echo $FAILED_CASES | wc -w`
prlog "# of unresolved: " `echo $UNRESOLVED_CASES | wc -w`
prlog "# of untested: " `echo $UNTESTED_CASES | wc -w`
prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
# if no error, return 0
exit $TOTAL_RESULT

View file

@ -0,0 +1,4 @@
#!/bin/sh
# description: failure-case example
cat non-exist-file
echo "this is not executed"

View file

@ -0,0 +1,3 @@
#!/bin/sh
# description: pass-case example
return 0

View file

@ -0,0 +1,4 @@
#!/bin/sh
# description: unresolved-case example
trap exit_unresolved INT
kill -INT $PID

View file

@ -0,0 +1,3 @@
#!/bin/sh
# description: unsupported-case example
exit_unsupported

View file

@ -0,0 +1,3 @@
#!/bin/sh
# description: untested-case example
exit_untested

View file

@ -0,0 +1,3 @@
#!/bin/sh
# description: xfail-case example
cat non-exist-file || exit_xfail

View file

@ -0,0 +1,3 @@
#!/bin/sh
# description: Basic trace file check
test -f README -a -f trace -a -f tracing_on -a -f trace_pipe

View file

@ -0,0 +1,7 @@
#!/bin/sh
# description: Basic test for tracers
test -f available_tracers
for t in `cat available_tracers`; do
echo $t > current_tracer
done
echo nop > current_tracer

View file

@ -0,0 +1,8 @@
#!/bin/sh
# description: Basic trace clock test
test -f trace_clock
for c in `cat trace_clock | tr -d \[\]`; do
echo $c > trace_clock
grep '\['$c'\]' trace_clock
done
echo local > trace_clock

View file

@ -0,0 +1,11 @@
#!/bin/sh
# description: Kprobe dynamic event - adding and removing
[ -f kprobe_events ] || exit_unsupported # this is configurable
echo 0 > events/enable
echo > kprobe_events
echo p:myevent do_fork > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events

View file

@ -0,0 +1,13 @@
#!/bin/sh
# description: Kprobe dynamic event - busy event check
[ -f kprobe_events ] || exit_unsupported
echo 0 > events/enable
echo > kprobe_events
echo p:myevent do_fork > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit 1 # this must fail
echo 0 > events/kprobes/myevent/enable
echo > kprobe_events # this must succeed

View file

@ -0,0 +1,16 @@
#!/bin/sh
# description: Kprobe dynamic event with arguments
[ -f kprobe_events ] || exit_unsupported # this is configurable
echo 0 > events/enable
echo > kprobe_events
echo 'p:testprobe do_fork $stack $stack0 +0($stack)' > kprobe_events
grep testprobe kprobe_events
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
( echo "forked")
echo 0 > events/kprobes/testprobe/enable
echo "-:testprobe" >> kprobe_events
test -d events/kprobes/testprobe && exit 1 || exit 0

View file

@ -0,0 +1,15 @@
#!/bin/sh
# description: Kretprobe dynamic event with arguments
[ -f kprobe_events ] || exit_unsupported # this is configurable
echo 0 > events/enable
echo > kprobe_events
echo 'r:testprobe2 do_fork $retval' > kprobe_events
grep testprobe2 kprobe_events
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
( echo "forked")
echo 0 > events/kprobes/testprobe2/enable
echo '-:testprobe2' >> kprobe_events
test -d events/kprobes/testprobe2 && exit 1 || exit 0

View file

@ -0,0 +1,9 @@
#!/bin/sh
# description: %HERE DESCRIBE WHAT THIS DOES%
# you have to add ".tc" extention for your testcase file
# Note that all tests are run with "errexit" option.
exit 0 # Return 0 if the test is passed, otherwise return !0
# If the test could not run because of lack of feature, call exit_unsupported
# If the test returned unclear results, call exit_unresolved
# If the test is a dummy, or a placeholder, call exit_untested

View file

@ -0,0 +1,25 @@
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
ifeq ($(ARCH),i386)
ARCH := x86
CFLAGS := -DCONFIG_X86_32 -D__i386__
endif
ifeq ($(ARCH),x86_64)
ARCH := x86
CFLAGS := -DCONFIG_X86_64 -D__x86_64__
endif
CFLAGS += -I../../../../usr/include/
all:
ifeq ($(ARCH),x86)
gcc $(CFLAGS) msgque.c -o msgque_test
else
echo "Not an x86 target, can't build msgque selftest"
endif
run_tests: all
./msgque_test
clean:
rm -fr ./msgque_test

View file

@ -0,0 +1,252 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <linux/msg.h>
#include <fcntl.h>
#define MAX_MSG_SIZE 32
struct msg1 {
int msize;
long mtype;
char mtext[MAX_MSG_SIZE];
};
#define TEST_STRING "Test sysv5 msg"
#define MSG_TYPE 1
#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
#define ANOTHER_MSG_TYPE 26538
struct msgque_data {
key_t key;
int msq_id;
int qbytes;
int qnum;
int mode;
struct msg1 *messages;
};
int restore_queue(struct msgque_data *msgque)
{
int fd, ret, id, i;
char buf[32];
fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
if (fd == -1) {
printf("Failed to open /proc/sys/kernel/msg_next_id\n");
return -errno;
}
sprintf(buf, "%d", msgque->msq_id);
ret = write(fd, buf, strlen(buf));
if (ret != strlen(buf)) {
printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
return -errno;
}
id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
if (id == -1) {
printf("Failed to create queue\n");
return -errno;
}
if (id != msgque->msq_id) {
printf("Restored queue has wrong id (%d instead of %d)\n",
id, msgque->msq_id);
ret = -EFAULT;
goto destroy;
}
for (i = 0; i < msgque->qnum; i++) {
if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
msgque->messages[i].msize, IPC_NOWAIT) != 0) {
printf("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
};
}
return 0;
destroy:
if (msgctl(id, IPC_RMID, 0))
printf("Failed to destroy queue: %d\n", -errno);
return ret;
}
int check_and_destroy_queue(struct msgque_data *msgque)
{
struct msg1 message;
int cnt = 0, ret;
while (1) {
ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
0, IPC_NOWAIT);
if (ret < 0) {
if (errno == ENOMSG)
break;
printf("Failed to read IPC message: %m\n");
ret = -errno;
goto err;
}
if (ret != msgque->messages[cnt].msize) {
printf("Wrong message size: %d (expected %d)\n", ret,
msgque->messages[cnt].msize);
ret = -EINVAL;
goto err;
}
if (message.mtype != msgque->messages[cnt].mtype) {
printf("Wrong message type\n");
ret = -EINVAL;
goto err;
}
if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
printf("Wrong message content\n");
ret = -EINVAL;
goto err;
}
cnt++;
}
if (cnt != msgque->qnum) {
printf("Wrong message number\n");
ret = -EINVAL;
goto err;
}
ret = 0;
err:
if (msgctl(msgque->msq_id, IPC_RMID, 0)) {
printf("Failed to destroy queue: %d\n", -errno);
return -errno;
}
return ret;
}
int dump_queue(struct msgque_data *msgque)
{
struct msqid64_ds ds;
int kern_id;
int i, ret;
for (kern_id = 0; kern_id < 256; kern_id++) {
ret = msgctl(kern_id, MSG_STAT, &ds);
if (ret < 0) {
if (errno == -EINVAL)
continue;
printf("Failed to get stats for IPC queue with id %d\n",
kern_id);
return -errno;
}
if (ret == msgque->msq_id)
break;
}
msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
if (msgque->messages == NULL) {
printf("Failed to get stats for IPC queue\n");
return -ENOMEM;
}
msgque->qnum = ds.msg_qnum;
msgque->mode = ds.msg_perm.mode;
msgque->qbytes = ds.msg_qbytes;
for (i = 0; i < msgque->qnum; i++) {
ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
if (ret < 0) {
printf("Failed to copy IPC message: %m (%d)\n", errno);
return -errno;
}
msgque->messages[i].msize = ret;
}
return 0;
}
int fill_msgque(struct msgque_data *msgque)
{
struct msg1 msgbuf;
msgbuf.mtype = MSG_TYPE;
memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
IPC_NOWAIT) != 0) {
printf("First message send failed (%m)\n");
return -errno;
};
msgbuf.mtype = ANOTHER_MSG_TYPE;
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
IPC_NOWAIT) != 0) {
printf("Second message send failed (%m)\n");
return -errno;
};
return 0;
}
int main(int argc, char **argv)
{
int msg, pid, err;
struct msgque_data msgque;
if (getuid() != 0) {
printf("Please run the test as root - Exiting.\n");
exit(1);
}
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
printf("Can't make key\n");
return -errno;
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
if (msgque.msq_id == -1) {
err = -errno;
printf("Can't create queue\n");
goto err_out;
}
err = fill_msgque(&msgque);
if (err) {
printf("Failed to fill queue\n");
goto err_destroy;
}
err = dump_queue(&msgque);
if (err) {
printf("Failed to dump queue\n");
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
printf("Failed to check and destroy queue\n");
goto err_out;
}
err = restore_queue(&msgque);
if (err) {
printf("Failed to restore queue\n");
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
printf("Failed to test queue\n");
goto err_out;
}
return 0;
err_destroy:
if (msgctl(msgque.msq_id, IPC_RMID, 0)) {
printf("Failed to destroy queue: %d\n", -errno);
return -errno;
}
err_out:
return err;
}

View file

@ -0,0 +1,28 @@
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
ifeq ($(ARCH),i386)
ARCH := x86
CFLAGS := -DCONFIG_X86_32 -D__i386__
endif
ifeq ($(ARCH),x86_64)
ARCH := x86
CFLAGS := -DCONFIG_X86_64 -D__x86_64__
endif
CFLAGS += -I../../../../arch/x86/include/generated/
CFLAGS += -I../../../../include/
CFLAGS += -I../../../../usr/include/
CFLAGS += -I../../../../arch/x86/include/
all:
ifeq ($(ARCH),x86)
gcc $(CFLAGS) kcmp_test.c -o kcmp_test
else
echo "Not an x86 target, can't build kcmp selftest"
endif
run_tests: all
@./kcmp_test || echo "kcmp_test: [FAIL]"
clean:
$(RM) kcmp_test kcmp-test-file

View file

@ -0,0 +1,96 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <limits.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <linux/unistd.h>
#include <linux/kcmp.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2)
{
return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
}
int main(int argc, char **argv)
{
const char kpath[] = "kcmp-test-file";
int pid1, pid2;
int fd1, fd2;
int status;
fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
pid1 = getpid();
if (fd1 < 0) {
perror("Can't create file");
exit(1);
}
pid2 = fork();
if (pid2 < 0) {
perror("fork failed");
exit(1);
}
if (!pid2) {
int pid2 = getpid();
int ret;
fd2 = open(kpath, O_RDWR, 0644);
if (fd2 < 0) {
perror("Can't open file");
exit(1);
}
/* An example of output and arguments */
printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld "
"FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld "
"INV: %2ld\n",
pid1, pid2,
sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2),
sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0),
sys_kcmp(pid1, pid2, KCMP_VM, 0, 0),
sys_kcmp(pid1, pid2, KCMP_FS, 0, 0),
sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0),
sys_kcmp(pid1, pid2, KCMP_IO, 0, 0),
sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0),
/* This one should fail */
sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0));
/* This one should return same fd */
ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
if (ret) {
printf("FAIL: 0 expected but %d returned (%s)\n",
ret, strerror(errno));
ret = -1;
} else
printf("PASS: 0 returned as expected\n");
/* Compare with self */
ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
if (ret) {
printf("FAIL: 0 expected but %d returned (%s)\n",
ret, strerror(errno));
ret = -1;
} else
printf("PASS: 0 returned as expected\n");
exit(ret);
}
waitpid(pid2, &status, P_ALL);
return 0;
}

View file

@ -0,0 +1,20 @@
CFLAGS += -D_FILE_OFFSET_BITS=64
CFLAGS += -I../../../../include/uapi/
CFLAGS += -I../../../../include/
all:
gcc $(CFLAGS) memfd_test.c -o memfd_test
run_tests: all
gcc $(CFLAGS) memfd_test.c -o memfd_test
@./memfd_test || echo "memfd_test: [FAIL]"
build_fuse:
gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
gcc $(CFLAGS) fuse_test.c -o fuse_test
run_fuse: build_fuse
@./run_fuse_test.sh || echo "fuse_test: [FAIL]"
clean:
$(RM) memfd_test fuse_test

View file

@ -0,0 +1,110 @@
/*
* memfd test file-system
* This file uses FUSE to create a dummy file-system with only one file /memfd.
* This file is read-only and takes 1s per read.
*
* This file-system is used by the memfd test-cases to force the kernel to pin
* pages during reads(). Due to the 1s delay of this file-system, this is a
* nice way to test race-conditions against get_user_pages() in the kernel.
*
* We use direct_io==1 to force the kernel to use direct-IO for this
* file-system.
*/
#define FUSE_USE_VERSION 26
#include <fuse.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
static const char memfd_content[] = "memfd-example-content";
static const char memfd_path[] = "/memfd";
static int memfd_getattr(const char *path, struct stat *st)
{
memset(st, 0, sizeof(*st));
if (!strcmp(path, "/")) {
st->st_mode = S_IFDIR | 0755;
st->st_nlink = 2;
} else if (!strcmp(path, memfd_path)) {
st->st_mode = S_IFREG | 0444;
st->st_nlink = 1;
st->st_size = strlen(memfd_content);
} else {
return -ENOENT;
}
return 0;
}
static int memfd_readdir(const char *path,
void *buf,
fuse_fill_dir_t filler,
off_t offset,
struct fuse_file_info *fi)
{
if (strcmp(path, "/"))
return -ENOENT;
filler(buf, ".", NULL, 0);
filler(buf, "..", NULL, 0);
filler(buf, memfd_path + 1, NULL, 0);
return 0;
}
static int memfd_open(const char *path, struct fuse_file_info *fi)
{
if (strcmp(path, memfd_path))
return -ENOENT;
if ((fi->flags & 3) != O_RDONLY)
return -EACCES;
/* force direct-IO */
fi->direct_io = 1;
return 0;
}
static int memfd_read(const char *path,
char *buf,
size_t size,
off_t offset,
struct fuse_file_info *fi)
{
size_t len;
if (strcmp(path, memfd_path) != 0)
return -ENOENT;
sleep(1);
len = strlen(memfd_content);
if (offset < len) {
if (offset + size > len)
size = len - offset;
memcpy(buf, memfd_content + offset, size);
} else {
size = 0;
}
return size;
}
static struct fuse_operations memfd_ops = {
.getattr = memfd_getattr,
.readdir = memfd_readdir,
.open = memfd_open,
.read = memfd_read,
};
int main(int argc, char *argv[])
{
return fuse_main(argc, argv, &memfd_ops, NULL);
}

View file

@ -0,0 +1,311 @@
/*
* memfd GUP test-case
* This tests memfd interactions with get_user_pages(). We require the
* fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
* file-system delays _all_ reads by 1s and forces direct-IO. This means, any
* read() on files in that file-system will pin the receive-buffer pages for at
* least 1s via get_user_pages().
*
* We use this trick to race ADD_SEALS against a write on a memfd object. The
* ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
* the read() syscall with our memory-mapped memfd object as receive buffer to
* force the kernel to write into our memfd object.
*/
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <linux/falloc.h>
#include <linux/fcntl.h>
#include <linux/memfd.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
#define MFD_DEF_SIZE 8192
#define STACK_SIZE 65535
static int sys_memfd_create(const char *name,
unsigned int flags)
{
return syscall(__NR_memfd_create, name, flags);
}
static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
{
int r, fd;
fd = sys_memfd_create(name, flags);
if (fd < 0) {
printf("memfd_create(\"%s\", %u) failed: %m\n",
name, flags);
abort();
}
r = ftruncate(fd, sz);
if (r < 0) {
printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
abort();
}
return fd;
}
static __u64 mfd_assert_get_seals(int fd)
{
long r;
r = fcntl(fd, F_GET_SEALS);
if (r < 0) {
printf("GET_SEALS(%d) failed: %m\n", fd);
abort();
}
return r;
}
static void mfd_assert_has_seals(int fd, __u64 seals)
{
__u64 s;
s = mfd_assert_get_seals(fd);
if (s != seals) {
printf("%llu != %llu = GET_SEALS(%d)\n",
(unsigned long long)seals, (unsigned long long)s, fd);
abort();
}
}
static void mfd_assert_add_seals(int fd, __u64 seals)
{
long r;
__u64 s;
s = mfd_assert_get_seals(fd);
r = fcntl(fd, F_ADD_SEALS, seals);
if (r < 0) {
printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
fd, (unsigned long long)s, (unsigned long long)seals);
abort();
}
}
static int mfd_busy_add_seals(int fd, __u64 seals)
{
long r;
__u64 s;
r = fcntl(fd, F_GET_SEALS);
if (r < 0)
s = 0;
else
s = r;
r = fcntl(fd, F_ADD_SEALS, seals);
if (r < 0 && errno != EBUSY) {
printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
fd, (unsigned long long)s, (unsigned long long)seals);
abort();
}
return r;
}
static void *mfd_assert_mmap_shared(int fd)
{
void *p;
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
return p;
}
static void *mfd_assert_mmap_private(int fd)
{
void *p;
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
return p;
}
static int global_mfd = -1;
static void *global_p = NULL;
static int sealing_thread_fn(void *arg)
{
int sig, r;
/*
* This thread first waits 200ms so any pending operation in the parent
* is correctly started. After that, it tries to seal @global_mfd as
* SEAL_WRITE. This _must_ fail as the parent thread has a read() into
* that memory mapped object still ongoing.
* We then wait one more second and try sealing again. This time it
* must succeed as there shouldn't be anyone else pinning the pages.
*/
/* wait 200ms for FUSE-request to be active */
usleep(200000);
/* unmount mapping before sealing to avoid i_mmap_writable failures */
munmap(global_p, MFD_DEF_SIZE);
/* Try sealing the global file; expect EBUSY or success. Current
* kernels will never succeed, but in the future, kernels might
* implement page-replacements or other fancy ways to avoid racing
* writes. */
r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
if (r >= 0) {
printf("HURRAY! This kernel fixed GUP races!\n");
} else {
/* wait 1s more so the FUSE-request is done */
sleep(1);
/* try sealing the global file again */
mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
}
return 0;
}
static pid_t spawn_sealing_thread(void)
{
uint8_t *stack;
pid_t pid;
stack = malloc(STACK_SIZE);
if (!stack) {
printf("malloc(STACK_SIZE) failed: %m\n");
abort();
}
pid = clone(sealing_thread_fn,
stack + STACK_SIZE,
SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
NULL);
if (pid < 0) {
printf("clone() failed: %m\n");
abort();
}
return pid;
}
static void join_sealing_thread(pid_t pid)
{
waitpid(pid, NULL, 0);
}
int main(int argc, char **argv)
{
static const char zero[MFD_DEF_SIZE];
int fd, mfd, r;
void *p;
int was_sealed;
pid_t pid;
if (argc < 2) {
printf("error: please pass path to file in fuse_mnt mount-point\n");
abort();
}
/* open FUSE memfd file for GUP testing */
printf("opening: %s\n", argv[1]);
fd = open(argv[1], O_RDONLY | O_CLOEXEC);
if (fd < 0) {
printf("cannot open(\"%s\"): %m\n", argv[1]);
abort();
}
/* create new memfd-object */
mfd = mfd_assert_new("kern_memfd_fuse",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
/* mmap memfd-object for writing */
p = mfd_assert_mmap_shared(mfd);
/* pass mfd+mapping to a separate sealing-thread which tries to seal
* the memfd objects with SEAL_WRITE while we write into it */
global_mfd = mfd;
global_p = p;
pid = spawn_sealing_thread();
/* Use read() on the FUSE file to read into our memory-mapped memfd
* object. This races the other thread which tries to seal the
* memfd-object.
* If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
* This guarantees that the receive-buffer is pinned for 1s until the
* data is written into it. The racing ADD_SEALS should thus fail as
* the pages are still pinned. */
r = read(fd, p, MFD_DEF_SIZE);
if (r < 0) {
printf("read() failed: %m\n");
abort();
} else if (!r) {
printf("unexpected EOF on read()\n");
abort();
}
was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
/* Wait for sealing-thread to finish and verify that it
* successfully sealed the file after the second try. */
join_sealing_thread(pid);
mfd_assert_has_seals(mfd, F_SEAL_WRITE);
/* *IF* the memfd-object was sealed at the time our read() returned,
* then the kernel did a page-replacement or canceled the read() (or
* whatever magic it did..). In that case, the memfd object is still
* all zero.
* In case the memfd-object was *not* sealed, the read() was successfull
* and the memfd object must *not* be all zero.
* Note that in real scenarios, there might be a mixture of both, but
* in this test-cases, we have explicit 200ms delays which should be
* enough to avoid any in-flight writes. */
p = mfd_assert_mmap_private(mfd);
if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
printf("memfd sealed during read() but data not discarded\n");
abort();
} else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
printf("memfd sealed after read() but data discarded\n");
abort();
}
close(mfd);
close(fd);
printf("fuse: DONE\n");
return 0;
}

View file

@ -0,0 +1,911 @@
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <linux/falloc.h>
#include <linux/fcntl.h>
#include <linux/memfd.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
#define MFD_DEF_SIZE 8192
#define STACK_SIZE 65535
static int sys_memfd_create(const char *name,
unsigned int flags)
{
return syscall(__NR_memfd_create, name, flags);
}
static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
{
int r, fd;
fd = sys_memfd_create(name, flags);
if (fd < 0) {
printf("memfd_create(\"%s\", %u) failed: %m\n",
name, flags);
abort();
}
r = ftruncate(fd, sz);
if (r < 0) {
printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
abort();
}
return fd;
}
static void mfd_fail_new(const char *name, unsigned int flags)
{
int r;
r = sys_memfd_create(name, flags);
if (r >= 0) {
printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
name, flags);
close(r);
abort();
}
}
static unsigned int mfd_assert_get_seals(int fd)
{
int r;
r = fcntl(fd, F_GET_SEALS);
if (r < 0) {
printf("GET_SEALS(%d) failed: %m\n", fd);
abort();
}
return (unsigned int)r;
}
static void mfd_assert_has_seals(int fd, unsigned int seals)
{
unsigned int s;
s = mfd_assert_get_seals(fd);
if (s != seals) {
printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
abort();
}
}
static void mfd_assert_add_seals(int fd, unsigned int seals)
{
int r;
unsigned int s;
s = mfd_assert_get_seals(fd);
r = fcntl(fd, F_ADD_SEALS, seals);
if (r < 0) {
printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
abort();
}
}
static void mfd_fail_add_seals(int fd, unsigned int seals)
{
int r;
unsigned int s;
r = fcntl(fd, F_GET_SEALS);
if (r < 0)
s = 0;
else
s = (unsigned int)r;
r = fcntl(fd, F_ADD_SEALS, seals);
if (r >= 0) {
printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
fd, s, seals);
abort();
}
}
static void mfd_assert_size(int fd, size_t size)
{
struct stat st;
int r;
r = fstat(fd, &st);
if (r < 0) {
printf("fstat(%d) failed: %m\n", fd);
abort();
} else if (st.st_size != size) {
printf("wrong file size %lld, but expected %lld\n",
(long long)st.st_size, (long long)size);
abort();
}
}
static int mfd_assert_dup(int fd)
{
int r;
r = dup(fd);
if (r < 0) {
printf("dup(%d) failed: %m\n", fd);
abort();
}
return r;
}
static void *mfd_assert_mmap_shared(int fd)
{
void *p;
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
return p;
}
static void *mfd_assert_mmap_private(int fd)
{
void *p;
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ,
MAP_PRIVATE,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
return p;
}
static int mfd_assert_open(int fd, int flags, mode_t mode)
{
char buf[512];
int r;
sprintf(buf, "/proc/self/fd/%d", fd);
r = open(buf, flags, mode);
if (r < 0) {
printf("open(%s) failed: %m\n", buf);
abort();
}
return r;
}
static void mfd_fail_open(int fd, int flags, mode_t mode)
{
char buf[512];
int r;
sprintf(buf, "/proc/self/fd/%d", fd);
r = open(buf, flags, mode);
if (r >= 0) {
printf("open(%s) didn't fail as expected\n", buf);
abort();
}
}
static void mfd_assert_read(int fd)
{
char buf[16];
void *p;
ssize_t l;
l = read(fd, buf, sizeof(buf));
if (l != sizeof(buf)) {
printf("read() failed: %m\n");
abort();
}
/* verify PROT_READ *is* allowed */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ,
MAP_PRIVATE,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
munmap(p, MFD_DEF_SIZE);
/* verify MAP_PRIVATE is *always* allowed (even writable) */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
munmap(p, MFD_DEF_SIZE);
}
static void mfd_assert_write(int fd)
{
ssize_t l;
void *p;
int r;
/* verify write() succeeds */
l = write(fd, "\0\0\0\0", 4);
if (l != 4) {
printf("write() failed: %m\n");
abort();
}
/* verify PROT_READ | PROT_WRITE is allowed */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
*(char *)p = 0;
munmap(p, MFD_DEF_SIZE);
/* verify PROT_WRITE is allowed */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_WRITE,
MAP_SHARED,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
*(char *)p = 0;
munmap(p, MFD_DEF_SIZE);
/* verify PROT_READ with MAP_SHARED is allowed and a following
* mprotect(PROT_WRITE) allows writing */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ,
MAP_SHARED,
fd,
0);
if (p == MAP_FAILED) {
printf("mmap() failed: %m\n");
abort();
}
r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
if (r < 0) {
printf("mprotect() failed: %m\n");
abort();
}
*(char *)p = 0;
munmap(p, MFD_DEF_SIZE);
/* verify PUNCH_HOLE works */
r = fallocate(fd,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
0,
MFD_DEF_SIZE);
if (r < 0) {
printf("fallocate(PUNCH_HOLE) failed: %m\n");
abort();
}
}
static void mfd_fail_write(int fd)
{
ssize_t l;
void *p;
int r;
/* verify write() fails */
l = write(fd, "data", 4);
if (l != -EPERM) {
printf("expected EPERM on write(), but got %d: %m\n", (int)l);
abort();
}
/* verify PROT_READ | PROT_WRITE is not allowed */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
0);
if (p != MAP_FAILED) {
printf("mmap() didn't fail as expected\n");
abort();
}
/* verify PROT_WRITE is not allowed */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_WRITE,
MAP_SHARED,
fd,
0);
if (p != MAP_FAILED) {
printf("mmap() didn't fail as expected\n");
abort();
}
/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
* allowed. Note that for r/w the kernel already prevents the mmap. */
p = mmap(NULL,
MFD_DEF_SIZE,
PROT_READ,
MAP_SHARED,
fd,
0);
if (p != MAP_FAILED) {
r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
if (r >= 0) {
printf("mmap()+mprotect() didn't fail as expected\n");
abort();
}
}
/* verify PUNCH_HOLE fails */
r = fallocate(fd,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
0,
MFD_DEF_SIZE);
if (r >= 0) {
printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
abort();
}
}
static void mfd_assert_shrink(int fd)
{
int r, fd2;
r = ftruncate(fd, MFD_DEF_SIZE / 2);
if (r < 0) {
printf("ftruncate(SHRINK) failed: %m\n");
abort();
}
mfd_assert_size(fd, MFD_DEF_SIZE / 2);
fd2 = mfd_assert_open(fd,
O_RDWR | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR);
close(fd2);
mfd_assert_size(fd, 0);
}
static void mfd_fail_shrink(int fd)
{
int r;
r = ftruncate(fd, MFD_DEF_SIZE / 2);
if (r >= 0) {
printf("ftruncate(SHRINK) didn't fail as expected\n");
abort();
}
mfd_fail_open(fd,
O_RDWR | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR);
}
static void mfd_assert_grow(int fd)
{
int r;
r = ftruncate(fd, MFD_DEF_SIZE * 2);
if (r < 0) {
printf("ftruncate(GROW) failed: %m\n");
abort();
}
mfd_assert_size(fd, MFD_DEF_SIZE * 2);
r = fallocate(fd,
0,
0,
MFD_DEF_SIZE * 4);
if (r < 0) {
printf("fallocate(ALLOC) failed: %m\n");
abort();
}
mfd_assert_size(fd, MFD_DEF_SIZE * 4);
}
static void mfd_fail_grow(int fd)
{
int r;
r = ftruncate(fd, MFD_DEF_SIZE * 2);
if (r >= 0) {
printf("ftruncate(GROW) didn't fail as expected\n");
abort();
}
r = fallocate(fd,
0,
0,
MFD_DEF_SIZE * 4);
if (r >= 0) {
printf("fallocate(ALLOC) didn't fail as expected\n");
abort();
}
}
static void mfd_assert_grow_write(int fd)
{
static char buf[MFD_DEF_SIZE * 8];
ssize_t l;
l = pwrite(fd, buf, sizeof(buf), 0);
if (l != sizeof(buf)) {
printf("pwrite() failed: %m\n");
abort();
}
mfd_assert_size(fd, MFD_DEF_SIZE * 8);
}
static void mfd_fail_grow_write(int fd)
{
static char buf[MFD_DEF_SIZE * 8];
ssize_t l;
l = pwrite(fd, buf, sizeof(buf), 0);
if (l == sizeof(buf)) {
printf("pwrite() didn't fail as expected\n");
abort();
}
}
static int idle_thread_fn(void *arg)
{
sigset_t set;
int sig;
/* dummy waiter; SIGTERM terminates us anyway */
sigemptyset(&set);
sigaddset(&set, SIGTERM);
sigwait(&set, &sig);
return 0;
}
static pid_t spawn_idle_thread(unsigned int flags)
{
uint8_t *stack;
pid_t pid;
stack = malloc(STACK_SIZE);
if (!stack) {
printf("malloc(STACK_SIZE) failed: %m\n");
abort();
}
pid = clone(idle_thread_fn,
stack + STACK_SIZE,
SIGCHLD | flags,
NULL);
if (pid < 0) {
printf("clone() failed: %m\n");
abort();
}
return pid;
}
static void join_idle_thread(pid_t pid)
{
kill(pid, SIGTERM);
waitpid(pid, NULL, 0);
}
/*
* Test memfd_create() syscall
* Verify syscall-argument validation, including name checks, flag validation
* and more.
*/
static void test_create(void)
{
char buf[2048];
int fd;
/* test NULL name */
mfd_fail_new(NULL, 0);
/* test over-long name (not zero-terminated) */
memset(buf, 0xff, sizeof(buf));
mfd_fail_new(buf, 0);
/* test over-long zero-terminated name */
memset(buf, 0xff, sizeof(buf));
buf[sizeof(buf) - 1] = 0;
mfd_fail_new(buf, 0);
/* verify "" is a valid name */
fd = mfd_assert_new("", 0, 0);
close(fd);
/* verify invalid O_* open flags */
mfd_fail_new("", 0x0100);
mfd_fail_new("", ~MFD_CLOEXEC);
mfd_fail_new("", ~MFD_ALLOW_SEALING);
mfd_fail_new("", ~0);
mfd_fail_new("", 0x80000000U);
/* verify MFD_CLOEXEC is allowed */
fd = mfd_assert_new("", 0, MFD_CLOEXEC);
close(fd);
/* verify MFD_ALLOW_SEALING is allowed */
fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
close(fd);
/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
close(fd);
}
/*
* Test basic sealing
* A very basic sealing test to see whether setting/retrieving seals works.
*/
static void test_basic(void)
{
int fd;
fd = mfd_assert_new("kern_memfd_basic",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
/* add basic seals */
mfd_assert_has_seals(fd, 0);
mfd_assert_add_seals(fd, F_SEAL_SHRINK |
F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_SHRINK |
F_SEAL_WRITE);
/* add them again */
mfd_assert_add_seals(fd, F_SEAL_SHRINK |
F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_SHRINK |
F_SEAL_WRITE);
/* add more seals and seal against sealing */
mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
mfd_assert_has_seals(fd, F_SEAL_SHRINK |
F_SEAL_GROW |
F_SEAL_WRITE |
F_SEAL_SEAL);
/* verify that sealing no longer works */
mfd_fail_add_seals(fd, F_SEAL_GROW);
mfd_fail_add_seals(fd, 0);
close(fd);
/* verify sealing does not work without MFD_ALLOW_SEALING */
fd = mfd_assert_new("kern_memfd_basic",
MFD_DEF_SIZE,
MFD_CLOEXEC);
mfd_assert_has_seals(fd, F_SEAL_SEAL);
mfd_fail_add_seals(fd, F_SEAL_SHRINK |
F_SEAL_GROW |
F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_SEAL);
close(fd);
}
/*
* Test SEAL_WRITE
* Test whether SEAL_WRITE actually prevents modifications.
*/
static void test_seal_write(void)
{
int fd;
fd = mfd_assert_new("kern_memfd_seal_write",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
mfd_assert_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_WRITE);
mfd_assert_read(fd);
mfd_fail_write(fd);
mfd_assert_shrink(fd);
mfd_assert_grow(fd);
mfd_fail_grow_write(fd);
close(fd);
}
/*
* Test SEAL_SHRINK
* Test whether SEAL_SHRINK actually prevents shrinking
*/
static void test_seal_shrink(void)
{
int fd;
fd = mfd_assert_new("kern_memfd_seal_shrink",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
mfd_assert_add_seals(fd, F_SEAL_SHRINK);
mfd_assert_has_seals(fd, F_SEAL_SHRINK);
mfd_assert_read(fd);
mfd_assert_write(fd);
mfd_fail_shrink(fd);
mfd_assert_grow(fd);
mfd_assert_grow_write(fd);
close(fd);
}
/*
* Test SEAL_GROW
* Test whether SEAL_GROW actually prevents growing
*/
static void test_seal_grow(void)
{
int fd;
fd = mfd_assert_new("kern_memfd_seal_grow",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
mfd_assert_add_seals(fd, F_SEAL_GROW);
mfd_assert_has_seals(fd, F_SEAL_GROW);
mfd_assert_read(fd);
mfd_assert_write(fd);
mfd_assert_shrink(fd);
mfd_fail_grow(fd);
mfd_fail_grow_write(fd);
close(fd);
}
/*
* Test SEAL_SHRINK | SEAL_GROW
* Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
*/
static void test_seal_resize(void)
{
int fd;
fd = mfd_assert_new("kern_memfd_seal_resize",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
mfd_assert_read(fd);
mfd_assert_write(fd);
mfd_fail_shrink(fd);
mfd_fail_grow(fd);
mfd_fail_grow_write(fd);
close(fd);
}
/*
* Test sharing via dup()
* Test that seals are shared between dupped FDs and they're all equal.
*/
static void test_share_dup(void)
{
int fd, fd2;
fd = mfd_assert_new("kern_memfd_share_dup",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
fd2 = mfd_assert_dup(fd);
mfd_assert_has_seals(fd2, 0);
mfd_assert_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd2, F_SEAL_WRITE);
mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
mfd_assert_add_seals(fd, F_SEAL_SEAL);
mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
mfd_fail_add_seals(fd, F_SEAL_GROW);
mfd_fail_add_seals(fd2, F_SEAL_GROW);
mfd_fail_add_seals(fd, F_SEAL_SEAL);
mfd_fail_add_seals(fd2, F_SEAL_SEAL);
close(fd2);
mfd_fail_add_seals(fd, F_SEAL_GROW);
close(fd);
}
/*
* Test sealing with active mmap()s
* Modifying seals is only allowed if no other mmap() refs exist.
*/
static void test_share_mmap(void)
{
int fd;
void *p;
fd = mfd_assert_new("kern_memfd_share_mmap",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
/* shared/writable ref prevents sealing WRITE, but allows others */
p = mfd_assert_mmap_shared(fd);
mfd_fail_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, 0);
mfd_assert_add_seals(fd, F_SEAL_SHRINK);
mfd_assert_has_seals(fd, F_SEAL_SHRINK);
munmap(p, MFD_DEF_SIZE);
/* readable ref allows sealing */
p = mfd_assert_mmap_private(fd);
mfd_assert_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
munmap(p, MFD_DEF_SIZE);
close(fd);
}
/*
* Test sealing with open(/proc/self/fd/%d)
* Via /proc we can get access to a separate file-context for the same memfd.
* This is *not* like dup(), but like a real separate open(). Make sure the
* semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
*/
static void test_share_open(void)
{
int fd, fd2;
fd = mfd_assert_new("kern_memfd_share_open",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
fd2 = mfd_assert_open(fd, O_RDWR, 0);
mfd_assert_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd2, F_SEAL_WRITE);
mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
close(fd);
fd = mfd_assert_open(fd2, O_RDONLY, 0);
mfd_fail_add_seals(fd, F_SEAL_SEAL);
mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
close(fd2);
fd2 = mfd_assert_open(fd, O_RDWR, 0);
mfd_assert_add_seals(fd2, F_SEAL_SEAL);
mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
close(fd2);
close(fd);
}
/*
* Test sharing via fork()
* Test whether seal-modifications work as expected with forked childs.
*/
static void test_share_fork(void)
{
int fd;
pid_t pid;
fd = mfd_assert_new("kern_memfd_share_fork",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_has_seals(fd, 0);
pid = spawn_idle_thread(0);
mfd_assert_add_seals(fd, F_SEAL_SEAL);
mfd_assert_has_seals(fd, F_SEAL_SEAL);
mfd_fail_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_SEAL);
join_idle_thread(pid);
mfd_fail_add_seals(fd, F_SEAL_WRITE);
mfd_assert_has_seals(fd, F_SEAL_SEAL);
close(fd);
}
int main(int argc, char **argv)
{
pid_t pid;
printf("memfd: CREATE\n");
test_create();
printf("memfd: BASIC\n");
test_basic();
printf("memfd: SEAL-WRITE\n");
test_seal_write();
printf("memfd: SEAL-SHRINK\n");
test_seal_shrink();
printf("memfd: SEAL-GROW\n");
test_seal_grow();
printf("memfd: SEAL-RESIZE\n");
test_seal_resize();
printf("memfd: SHARE-DUP\n");
test_share_dup();
printf("memfd: SHARE-MMAP\n");
test_share_mmap();
printf("memfd: SHARE-OPEN\n");
test_share_open();
printf("memfd: SHARE-FORK\n");
test_share_fork();
/* Run test-suite in a multi-threaded environment with a shared
* file-table. */
pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
printf("memfd: SHARE-DUP (shared file-table)\n");
test_share_dup();
printf("memfd: SHARE-MMAP (shared file-table)\n");
test_share_mmap();
printf("memfd: SHARE-OPEN (shared file-table)\n");
test_share_open();
printf("memfd: SHARE-FORK (shared file-table)\n");
test_share_fork();
join_idle_thread(pid);
printf("memfd: DONE\n");
return 0;
}

View file

@ -0,0 +1,14 @@
#!/bin/sh
if test -d "./mnt" ; then
fusermount -u ./mnt
rmdir ./mnt
fi
set -e
mkdir mnt
./fuse_mnt ./mnt
./fuse_test ./mnt/memfd
fusermount -u ./mnt
rmdir ./mnt

View file

@ -0,0 +1,9 @@
all:
run_tests:
@/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]"
run_full_test:
@/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
clean:

View file

@ -0,0 +1,238 @@
#!/bin/bash
SYSFS=
prerequisite()
{
msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
exit 0
fi
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
echo $msg sysfs is not mounted >&2
exit 0
fi
if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
echo $msg memory hotplug is not supported >&2
exit 0
fi
}
#
# list all hot-pluggable memory
#
hotpluggable_memory()
{
local state=${1:-.\*}
for memory in $SYSFS/devices/system/memory/memory*; do
if grep -q 1 $memory/removable &&
grep -q $state $memory/state; then
echo ${memory##/*/memory}
fi
done
}
hotplaggable_offline_memory()
{
hotpluggable_memory offline
}
hotpluggable_online_memory()
{
hotpluggable_memory online
}
memory_is_online()
{
grep -q online $SYSFS/devices/system/memory/memory$1/state
}
memory_is_offline()
{
grep -q offline $SYSFS/devices/system/memory/memory$1/state
}
online_memory()
{
echo online > $SYSFS/devices/system/memory/memory$1/state
}
offline_memory()
{
echo offline > $SYSFS/devices/system/memory/memory$1/state
}
online_memory_expect_success()
{
local memory=$1
if ! online_memory $memory; then
echo $FUNCNAME $memory: unexpected fail >&2
elif ! memory_is_online $memory; then
echo $FUNCNAME $memory: unexpected offline >&2
fi
}
online_memory_expect_fail()
{
local memory=$1
if online_memory $memory 2> /dev/null; then
echo $FUNCNAME $memory: unexpected success >&2
elif ! memory_is_offline $memory; then
echo $FUNCNAME $memory: unexpected online >&2
fi
}
offline_memory_expect_success()
{
local memory=$1
if ! offline_memory $memory; then
echo $FUNCNAME $memory: unexpected fail >&2
elif ! memory_is_offline $memory; then
echo $FUNCNAME $memory: unexpected offline >&2
fi
}
offline_memory_expect_fail()
{
local memory=$1
if offline_memory $memory 2> /dev/null; then
echo $FUNCNAME $memory: unexpected success >&2
elif ! memory_is_online $memory; then
echo $FUNCNAME $memory: unexpected offline >&2
fi
}
error=-12
priority=0
ratio=10
while getopts e:hp:r: opt; do
case $opt in
e)
error=$OPTARG
;;
h)
echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
exit
;;
p)
priority=$OPTARG
;;
r)
ratio=$OPTARG
;;
esac
done
if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
echo "error code must be -4095 <= errno < 0" >&2
exit 1
fi
prerequisite
echo "Test scope: $ratio% hotplug memory"
echo -e "\t online all hotplug memory in offline state"
echo -e "\t offline $ratio% hotplug memory in online state"
echo -e "\t online all hotplug memory in offline state"
#
# Online all hot-pluggable memory
#
for memory in `hotplaggable_offline_memory`; do
echo offline-online $memory
online_memory_expect_success $memory
done
#
# Offline $ratio percent of hot-pluggable memory
#
for memory in `hotpluggable_online_memory`; do
if [ $((RANDOM % 100)) -lt $ratio ]; then
echo online-offline $memory
offline_memory_expect_success $memory
fi
done
#
# Online all hot-pluggable memory again
#
for memory in `hotplaggable_offline_memory`; do
echo offline-online $memory
online_memory_expect_success $memory
done
#
# Test with memory notifier error injection
#
DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
prerequisite_extra()
{
msg="skip extra tests:"
/sbin/modprobe -q -r memory-notifier-error-inject
/sbin/modprobe -q memory-notifier-error-inject priority=$priority
if [ ! -d "$DEBUGFS" ]; then
echo $msg debugfs is not mounted >&2
exit 0
fi
if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
echo $msg memory-notifier-error-inject module is not available >&2
exit 0
fi
}
prerequisite_extra
#
# Offline $ratio percent of hot-pluggable memory
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
if [ $((RANDOM % 100)) -lt $ratio ]; then
offline_memory_expect_success $memory
fi
done
#
# Test memory hot-add error handling (offline => online)
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
for memory in `hotplaggable_offline_memory`; do
online_memory_expect_fail $memory
done
#
# Online all hot-pluggable memory
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
for memory in `hotplaggable_offline_memory`; do
online_memory_expect_success $memory
done
#
# Test memory hot-remove error handling (online => offline)
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
offline_memory_expect_fail $memory
done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
/sbin/modprobe -q -r memory-notifier-error-inject

View file

@ -0,0 +1,17 @@
# Makefile for mount selftests.
all: unprivileged-remount-test
unprivileged-remount-test: unprivileged-remount-test.c
gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
# Allow specific tests to be selected.
test_unprivileged_remount: unprivileged-remount-test
@if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
run_tests: all test_unprivileged_remount
clean:
rm -f unprivileged-remount-test
.PHONY: all test_unprivileged_remount

View file

@ -0,0 +1,370 @@
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <sys/vfs.h>
#include <sys/statvfs.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <grp.h>
#include <stdbool.h>
#include <stdarg.h>
#ifndef CLONE_NEWNS
# define CLONE_NEWNS 0x00020000
#endif
#ifndef CLONE_NEWUTS
# define CLONE_NEWUTS 0x04000000
#endif
#ifndef CLONE_NEWIPC
# define CLONE_NEWIPC 0x08000000
#endif
#ifndef CLONE_NEWNET
# define CLONE_NEWNET 0x40000000
#endif
#ifndef CLONE_NEWUSER
# define CLONE_NEWUSER 0x10000000
#endif
#ifndef CLONE_NEWPID
# define CLONE_NEWPID 0x20000000
#endif
#ifndef MS_REC
# define MS_REC 16384
#endif
#ifndef MS_RELATIME
# define MS_RELATIME (1 << 21)
#endif
#ifndef MS_STRICTATIME
# define MS_STRICTATIME (1 << 24)
#endif
static void die(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
exit(EXIT_FAILURE);
}
static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
{
char buf[4096];
int fd;
ssize_t written;
int buf_len;
buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
if (buf_len < 0) {
die("vsnprintf failed: %s\n",
strerror(errno));
}
if (buf_len >= sizeof(buf)) {
die("vsnprintf output truncated\n");
}
fd = open(filename, O_WRONLY);
if (fd < 0) {
if ((errno == ENOENT) && enoent_ok)
return;
die("open of %s failed: %s\n",
filename, strerror(errno));
}
written = write(fd, buf, buf_len);
if (written != buf_len) {
if (written >= 0) {
die("short write to %s\n", filename);
} else {
die("write to %s failed: %s\n",
filename, strerror(errno));
}
}
if (close(fd) != 0) {
die("close of %s failed: %s\n",
filename, strerror(errno));
}
}
static void maybe_write_file(char *filename, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vmaybe_write_file(true, filename, fmt, ap);
va_end(ap);
}
static void write_file(char *filename, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vmaybe_write_file(false, filename, fmt, ap);
va_end(ap);
}
static int read_mnt_flags(const char *path)
{
int ret;
struct statvfs stat;
int mnt_flags;
ret = statvfs(path, &stat);
if (ret != 0) {
die("statvfs of %s failed: %s\n",
path, strerror(errno));
}
if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
ST_SYNCHRONOUS | ST_MANDLOCK)) {
die("Unrecognized mount flags\n");
}
mnt_flags = 0;
if (stat.f_flag & ST_RDONLY)
mnt_flags |= MS_RDONLY;
if (stat.f_flag & ST_NOSUID)
mnt_flags |= MS_NOSUID;
if (stat.f_flag & ST_NODEV)
mnt_flags |= MS_NODEV;
if (stat.f_flag & ST_NOEXEC)
mnt_flags |= MS_NOEXEC;
if (stat.f_flag & ST_NOATIME)
mnt_flags |= MS_NOATIME;
if (stat.f_flag & ST_NODIRATIME)
mnt_flags |= MS_NODIRATIME;
if (stat.f_flag & ST_RELATIME)
mnt_flags |= MS_RELATIME;
if (stat.f_flag & ST_SYNCHRONOUS)
mnt_flags |= MS_SYNCHRONOUS;
if (stat.f_flag & ST_MANDLOCK)
mnt_flags |= ST_MANDLOCK;
return mnt_flags;
}
static void create_and_enter_userns(void)
{
uid_t uid;
gid_t gid;
uid = getuid();
gid = getgid();
if (unshare(CLONE_NEWUSER) !=0) {
die("unshare(CLONE_NEWUSER) failed: %s\n",
strerror(errno));
}
maybe_write_file("/proc/self/setgroups", "deny");
write_file("/proc/self/uid_map", "0 %d 1", uid);
write_file("/proc/self/gid_map", "0 %d 1", gid);
if (setgid(0) != 0) {
die ("setgid(0) failed %s\n",
strerror(errno));
}
if (setuid(0) != 0) {
die("setuid(0) failed %s\n",
strerror(errno));
}
}
static
bool test_unpriv_remount(const char *fstype, const char *mount_options,
int mount_flags, int remount_flags, int invalid_flags)
{
pid_t child;
child = fork();
if (child == -1) {
die("fork failed: %s\n",
strerror(errno));
}
if (child != 0) { /* parent */
pid_t pid;
int status;
pid = waitpid(child, &status, 0);
if (pid == -1) {
die("waitpid failed: %s\n",
strerror(errno));
}
if (pid != child) {
die("waited for %d got %d\n",
child, pid);
}
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
}
create_and_enter_userns();
if (unshare(CLONE_NEWNS) != 0) {
die("unshare(CLONE_NEWNS) failed: %s\n",
strerror(errno));
}
if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
die("mount of %s with options '%s' on /tmp failed: %s\n",
fstype,
mount_options? mount_options : "",
strerror(errno));
}
create_and_enter_userns();
if (unshare(CLONE_NEWNS) != 0) {
die("unshare(CLONE_NEWNS) failed: %s\n",
strerror(errno));
}
if (mount("/tmp", "/tmp", "none",
MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
/* system("cat /proc/self/mounts"); */
die("remount of /tmp failed: %s\n",
strerror(errno));
}
if (mount("/tmp", "/tmp", "none",
MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
/* system("cat /proc/self/mounts"); */
die("remount of /tmp with invalid flags "
"succeeded unexpectedly\n");
}
exit(EXIT_SUCCESS);
}
static bool test_unpriv_remount_simple(int mount_flags)
{
return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
}
static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
{
return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
invalid_flags);
}
static bool test_priv_mount_unpriv_remount(void)
{
pid_t child;
int ret;
const char *orig_path = "/dev";
const char *dest_path = "/tmp";
int orig_mnt_flags, remount_mnt_flags;
child = fork();
if (child == -1) {
die("fork failed: %s\n",
strerror(errno));
}
if (child != 0) { /* parent */
pid_t pid;
int status;
pid = waitpid(child, &status, 0);
if (pid == -1) {
die("waitpid failed: %s\n",
strerror(errno));
}
if (pid != child) {
die("waited for %d got %d\n",
child, pid);
}
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
}
orig_mnt_flags = read_mnt_flags(orig_path);
create_and_enter_userns();
ret = unshare(CLONE_NEWNS);
if (ret != 0) {
die("unshare(CLONE_NEWNS) failed: %s\n",
strerror(errno));
}
ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
if (ret != 0) {
die("recursive bind mount of %s onto %s failed: %s\n",
orig_path, dest_path, strerror(errno));
}
ret = mount(dest_path, dest_path, "none",
MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
if (ret != 0) {
/* system("cat /proc/self/mounts"); */
die("remount of /tmp failed: %s\n",
strerror(errno));
}
remount_mnt_flags = read_mnt_flags(dest_path);
if (orig_mnt_flags != remount_mnt_flags) {
die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
dest_path, orig_path);
}
exit(EXIT_SUCCESS);
}
int main(int argc, char **argv)
{
if (!test_unpriv_remount_simple(MS_RDONLY)) {
die("MS_RDONLY malfunctions\n");
}
if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
die("MS_NODEV malfunctions\n");
}
if (!test_unpriv_remount_simple(MS_NOSUID)) {
die("MS_NOSUID malfunctions\n");
}
if (!test_unpriv_remount_simple(MS_NOEXEC)) {
die("MS_NOEXEC malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_RELATIME,
MS_NOATIME))
{
die("MS_RELATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_STRICTATIME,
MS_NOATIME))
{
die("MS_STRICTATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_NOATIME,
MS_STRICTATIME))
{
die("MS_NOATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
MS_NOATIME))
{
die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
MS_NOATIME))
{
die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
MS_STRICTATIME))
{
die("MS_NOATIME|MS_DIRATIME malfunctions\n");
}
if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
{
die("Default atime malfunctions\n");
}
if (!test_priv_mount_unpriv_remount()) {
die("Mount flags unexpectedly changed after remount\n");
}
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,10 @@
all:
gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
run_tests:
@./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
@./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
clean:
rm -f mq_open_tests mq_perf_tests

View file

@ -0,0 +1,500 @@
/*
* This application is Copyright 2012 Red Hat, Inc.
* Doug Ledford <dledford@redhat.com>
*
* mq_open_tests is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* mq_open_tests is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* For the full text of the license, see <http://www.gnu.org/licenses/>.
*
* mq_open_tests.c
* Tests the various situations that should either succeed or fail to
* open a posix message queue and then reports whether or not they
* did as they were supposed to.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <mqueue.h>
static char *usage =
"Usage:\n"
" %s path\n"
"\n"
" path Path name of the message queue to create\n"
"\n"
" Note: this program must be run as root in order to enable all tests\n"
"\n";
char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default";
char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default";
char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
int default_settings;
struct rlimit saved_limits, cur_limits;
int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
char *queue_path;
mqd_t queue = -1;
static inline void __set(FILE *stream, int value, char *err_msg);
void shutdown(int exit_val, char *err_cause, int line_no);
static inline int get(FILE *stream);
static inline void set(FILE *stream, int value);
static inline void getr(int type, struct rlimit *rlim);
static inline void setr(int type, struct rlimit *rlim);
void validate_current_settings();
static inline void test_queue(struct mq_attr *attr, struct mq_attr *result);
static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result);
static inline void __set(FILE *stream, int value, char *err_msg)
{
rewind(stream);
if (fprintf(stream, "%d", value) < 0)
perror(err_msg);
}
void shutdown(int exit_val, char *err_cause, int line_no)
{
static int in_shutdown = 0;
/* In case we get called recursively by a set() call below */
if (in_shutdown++)
return;
if (seteuid(0) == -1)
perror("seteuid() failed");
if (queue != -1)
if (mq_close(queue))
perror("mq_close() during shutdown");
if (queue_path)
/*
* Be silent if this fails, if we cleaned up already it's
* expected to fail
*/
mq_unlink(queue_path);
if (default_settings) {
if (saved_def_msgs)
__set(def_msgs, saved_def_msgs,
"failed to restore saved_def_msgs");
if (saved_def_msgsize)
__set(def_msgsize, saved_def_msgsize,
"failed to restore saved_def_msgsize");
}
if (saved_max_msgs)
__set(max_msgs, saved_max_msgs,
"failed to restore saved_max_msgs");
if (saved_max_msgsize)
__set(max_msgsize, saved_max_msgsize,
"failed to restore saved_max_msgsize");
if (exit_val)
error(exit_val, errno, "%s at %d", err_cause, line_no);
exit(0);
}
static inline int get(FILE *stream)
{
int value;
rewind(stream);
if (fscanf(stream, "%d", &value) != 1)
shutdown(4, "Error reading /proc entry", __LINE__ - 1);
return value;
}
static inline void set(FILE *stream, int value)
{
int new_value;
rewind(stream);
if (fprintf(stream, "%d", value) < 0)
return shutdown(5, "Failed writing to /proc file",
__LINE__ - 1);
new_value = get(stream);
if (new_value != value)
return shutdown(5, "We didn't get what we wrote to /proc back",
__LINE__ - 1);
}
static inline void getr(int type, struct rlimit *rlim)
{
if (getrlimit(type, rlim))
shutdown(6, "getrlimit()", __LINE__ - 1);
}
static inline void setr(int type, struct rlimit *rlim)
{
if (setrlimit(type, rlim))
shutdown(7, "setrlimit()", __LINE__ - 1);
}
void validate_current_settings()
{
int rlim_needed;
if (cur_limits.rlim_cur < 4096) {
printf("Current rlimit value for POSIX message queue bytes is "
"unreasonably low,\nincreasing.\n\n");
cur_limits.rlim_cur = 8192;
cur_limits.rlim_max = 16384;
setr(RLIMIT_MSGQUEUE, &cur_limits);
}
if (default_settings) {
rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 +
2 * sizeof(void *));
if (rlim_needed > cur_limits.rlim_cur) {
printf("Temporarily lowering default queue parameters "
"to something that will work\n"
"with the current rlimit values.\n\n");
set(def_msgs, 10);
cur_def_msgs = 10;
set(def_msgsize, 128);
cur_def_msgsize = 128;
}
} else {
rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 +
2 * sizeof(void *));
if (rlim_needed > cur_limits.rlim_cur) {
printf("Temporarily lowering maximum queue parameters "
"to something that will work\n"
"with the current rlimit values in case this is "
"a kernel that ties the default\n"
"queue parameters to the maximum queue "
"parameters.\n\n");
set(max_msgs, 10);
cur_max_msgs = 10;
set(max_msgsize, 128);
cur_max_msgsize = 128;
}
}
}
/*
* test_queue - Test opening a queue, shutdown if we fail. This should
* only be called in situations that should never fail. We clean up
* after ourselves and return the queue attributes in *result.
*/
static inline void test_queue(struct mq_attr *attr, struct mq_attr *result)
{
int flags = O_RDWR | O_EXCL | O_CREAT;
int perms = DEFFILEMODE;
if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
shutdown(1, "mq_open()", __LINE__);
if (mq_getattr(queue, result))
shutdown(1, "mq_getattr()", __LINE__);
if (mq_close(queue))
shutdown(1, "mq_close()", __LINE__);
queue = -1;
if (mq_unlink(queue_path))
shutdown(1, "mq_unlink()", __LINE__);
}
/*
* Same as test_queue above, but failure is not fatal.
* Returns:
* 0 - Failed to create a queue
* 1 - Created a queue, attributes in *result
*/
static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result)
{
int flags = O_RDWR | O_EXCL | O_CREAT;
int perms = DEFFILEMODE;
if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
return 0;
if (mq_getattr(queue, result))
shutdown(1, "mq_getattr()", __LINE__);
if (mq_close(queue))
shutdown(1, "mq_close()", __LINE__);
queue = -1;
if (mq_unlink(queue_path))
shutdown(1, "mq_unlink()", __LINE__);
return 1;
}
int main(int argc, char *argv[])
{
struct mq_attr attr, result;
if (argc != 2) {
fprintf(stderr, "Must pass a valid queue name\n\n");
fprintf(stderr, usage, argv[0]);
exit(1);
}
/*
* Although we can create a msg queue with a non-absolute path name,
* unlink will fail. So, if the name doesn't start with a /, add one
* when we save it.
*/
if (*argv[1] == '/')
queue_path = strdup(argv[1]);
else {
queue_path = malloc(strlen(argv[1]) + 2);
if (!queue_path) {
perror("malloc()");
exit(1);
}
queue_path[0] = '/';
queue_path[1] = 0;
strcat(queue_path, argv[1]);
}
if (getuid() != 0) {
fprintf(stderr, "Not running as root, but almost all tests "
"require root in order to modify\nsystem settings. "
"Exiting.\n");
exit(1);
}
/* Find out what files there are for us to make tweaks in */
def_msgs = fopen(DEF_MSGS, "r+");
def_msgsize = fopen(DEF_MSGSIZE, "r+");
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
if (!max_msgs)
shutdown(2, "Failed to open msg_max", __LINE__);
if (!max_msgsize)
shutdown(2, "Failed to open msgsize_max", __LINE__);
if (def_msgs || def_msgsize)
default_settings = 1;
/* Load up the current system values for everything we can */
getr(RLIMIT_MSGQUEUE, &saved_limits);
cur_limits = saved_limits;
if (default_settings) {
saved_def_msgs = cur_def_msgs = get(def_msgs);
saved_def_msgsize = cur_def_msgsize = get(def_msgsize);
}
saved_max_msgs = cur_max_msgs = get(max_msgs);
saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
/* Tell the user our initial state */
printf("\nInitial system state:\n");
printf("\tUsing queue path:\t\t%s\n", queue_path);
printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n",
(long) saved_limits.rlim_cur);
printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n",
(long) saved_limits.rlim_max);
printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize);
printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs);
if (default_settings) {
printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize);
printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs);
} else {
printf("\tDefault Message Size:\t\tNot Supported\n");
printf("\tDefault Queue Size:\t\tNot Supported\n");
}
printf("\n");
validate_current_settings();
printf("Adjusted system state for testing:\n");
printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n", (long) cur_limits.rlim_cur);
printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n", (long) cur_limits.rlim_max);
printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize);
printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs);
if (default_settings) {
printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize);
printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs);
}
printf("\n\nTest series 1, behavior when no attr struct "
"passed to mq_open:\n");
if (!default_settings) {
test_queue(NULL, &result);
printf("Given sane system settings, mq_open without an attr "
"struct succeeds:\tPASS\n");
if (result.mq_maxmsg != cur_max_msgs ||
result.mq_msgsize != cur_max_msgsize) {
printf("Kernel does not support setting the default "
"mq attributes,\nbut also doesn't tie the "
"defaults to the maximums:\t\t\tPASS\n");
} else {
set(max_msgs, ++cur_max_msgs);
set(max_msgsize, ++cur_max_msgsize);
test_queue(NULL, &result);
if (result.mq_maxmsg == cur_max_msgs &&
result.mq_msgsize == cur_max_msgsize)
printf("Kernel does not support setting the "
"default mq attributes and\n"
"also ties system wide defaults to "
"the system wide maximums:\t\t"
"FAIL\n");
else
printf("Kernel does not support setting the "
"default mq attributes,\n"
"but also doesn't tie the defaults to "
"the maximums:\t\t\tPASS\n");
}
} else {
printf("Kernel supports setting defaults separately from "
"maximums:\t\tPASS\n");
/*
* While we are here, go ahead and test that the kernel
* properly follows the default settings
*/
test_queue(NULL, &result);
printf("Given sane values, mq_open without an attr struct "
"succeeds:\t\tPASS\n");
if (result.mq_maxmsg != cur_def_msgs ||
result.mq_msgsize != cur_def_msgsize)
printf("Kernel supports setting defaults, but does "
"not actually honor them:\tFAIL\n\n");
else {
set(def_msgs, ++cur_def_msgs);
set(def_msgsize, ++cur_def_msgsize);
/* In case max was the same as the default */
set(max_msgs, ++cur_max_msgs);
set(max_msgsize, ++cur_max_msgsize);
test_queue(NULL, &result);
if (result.mq_maxmsg != cur_def_msgs ||
result.mq_msgsize != cur_def_msgsize)
printf("Kernel supports setting defaults, but "
"does not actually honor them:\t"
"FAIL\n");
else
printf("Kernel properly honors default setting "
"knobs:\t\t\t\tPASS\n");
}
set(def_msgs, cur_max_msgs + 1);
cur_def_msgs = cur_max_msgs + 1;
set(def_msgsize, cur_max_msgsize + 1);
cur_def_msgsize = cur_max_msgsize + 1;
if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >=
cur_limits.rlim_cur) {
cur_limits.rlim_cur = (cur_def_msgs + 2) *
(cur_def_msgsize + 2 * sizeof(void *));
cur_limits.rlim_max = 2 * cur_limits.rlim_cur;
setr(RLIMIT_MSGQUEUE, &cur_limits);
}
if (test_queue_fail(NULL, &result)) {
if (result.mq_maxmsg == cur_max_msgs &&
result.mq_msgsize == cur_max_msgsize)
printf("Kernel properly limits default values "
"to lesser of default/max:\t\tPASS\n");
else
printf("Kernel does not properly set default "
"queue parameters when\ndefaults > "
"max:\t\t\t\t\t\t\t\tFAIL\n");
} else
printf("Kernel fails to open mq because defaults are "
"greater than maximums:\tFAIL\n");
set(def_msgs, --cur_def_msgs);
set(def_msgsize, --cur_def_msgsize);
cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs *
cur_def_msgsize;
setr(RLIMIT_MSGQUEUE, &cur_limits);
if (test_queue_fail(NULL, &result))
printf("Kernel creates queue even though defaults "
"would exceed\nrlimit setting:"
"\t\t\t\t\t\t\t\tFAIL\n");
else
printf("Kernel properly fails to create queue when "
"defaults would\nexceed rlimit:"
"\t\t\t\t\t\t\t\tPASS\n");
}
/*
* Test #2 - open with an attr struct that exceeds rlimit
*/
printf("\n\nTest series 2, behavior when attr struct is "
"passed to mq_open:\n");
cur_max_msgs = 32;
cur_max_msgsize = cur_limits.rlim_max >> 4;
set(max_msgs, cur_max_msgs);
set(max_msgsize, cur_max_msgsize);
attr.mq_maxmsg = cur_max_msgs;
attr.mq_msgsize = cur_max_msgsize;
if (test_queue_fail(&attr, &result))
printf("Queue open in excess of rlimit max when euid = 0 "
"succeeded:\t\tFAIL\n");
else
printf("Queue open in excess of rlimit max when euid = 0 "
"failed:\t\tPASS\n");
attr.mq_maxmsg = cur_max_msgs + 1;
attr.mq_msgsize = 10;
if (test_queue_fail(&attr, &result))
printf("Queue open with mq_maxmsg > limit when euid = 0 "
"succeeded:\t\tPASS\n");
else
printf("Queue open with mq_maxmsg > limit when euid = 0 "
"failed:\t\tFAIL\n");
attr.mq_maxmsg = 1;
attr.mq_msgsize = cur_max_msgsize + 1;
if (test_queue_fail(&attr, &result))
printf("Queue open with mq_msgsize > limit when euid = 0 "
"succeeded:\t\tPASS\n");
else
printf("Queue open with mq_msgsize > limit when euid = 0 "
"failed:\t\tFAIL\n");
attr.mq_maxmsg = 65536;
attr.mq_msgsize = 65536;
if (test_queue_fail(&attr, &result))
printf("Queue open with total size > 2GB when euid = 0 "
"succeeded:\t\tFAIL\n");
else
printf("Queue open with total size > 2GB when euid = 0 "
"failed:\t\t\tPASS\n");
if (seteuid(99) == -1) {
perror("seteuid() failed");
exit(1);
}
attr.mq_maxmsg = cur_max_msgs;
attr.mq_msgsize = cur_max_msgsize;
if (test_queue_fail(&attr, &result))
printf("Queue open in excess of rlimit max when euid = 99 "
"succeeded:\t\tFAIL\n");
else
printf("Queue open in excess of rlimit max when euid = 99 "
"failed:\t\tPASS\n");
attr.mq_maxmsg = cur_max_msgs + 1;
attr.mq_msgsize = 10;
if (test_queue_fail(&attr, &result))
printf("Queue open with mq_maxmsg > limit when euid = 99 "
"succeeded:\t\tFAIL\n");
else
printf("Queue open with mq_maxmsg > limit when euid = 99 "
"failed:\t\tPASS\n");
attr.mq_maxmsg = 1;
attr.mq_msgsize = cur_max_msgsize + 1;
if (test_queue_fail(&attr, &result))
printf("Queue open with mq_msgsize > limit when euid = 99 "
"succeeded:\t\tFAIL\n");
else
printf("Queue open with mq_msgsize > limit when euid = 99 "
"failed:\t\tPASS\n");
attr.mq_maxmsg = 65536;
attr.mq_msgsize = 65536;
if (test_queue_fail(&attr, &result))
printf("Queue open with total size > 2GB when euid = 99 "
"succeeded:\t\tFAIL\n");
else
printf("Queue open with total size > 2GB when euid = 99 "
"failed:\t\t\tPASS\n");
shutdown(0,"",0);
}

View file

@ -0,0 +1,743 @@
/*
* This application is Copyright 2012 Red Hat, Inc.
* Doug Ledford <dledford@redhat.com>
*
* mq_perf_tests is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* mq_perf_tests is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* For the full text of the license, see <http://www.gnu.org/licenses/>.
*
* mq_perf_tests.c
* Tests various types of message queue workloads, concentrating on those
* situations that invole large message sizes, large message queue depths,
* or both, and reports back useful metrics about kernel message queue
* performance.
*
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <signal.h>
#include <pthread.h>
#include <sched.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <mqueue.h>
#include <popt.h>
static char *usage =
"Usage:\n"
" %s [-c #[,#..] -f] path\n"
"\n"
" -c # Skip most tests and go straight to a high queue depth test\n"
" and then run that test continuously (useful for running at\n"
" the same time as some other workload to see how much the\n"
" cache thrashing caused by adding messages to a very deep\n"
" queue impacts the performance of other programs). The number\n"
" indicates which CPU core we should bind the process to during\n"
" the run. If you have more than one physical CPU, then you\n"
" will need one copy per physical CPU package, and you should\n"
" specify the CPU cores to pin ourself to via a comma separated\n"
" list of CPU values.\n"
" -f Only usable with continuous mode. Pin ourself to the CPUs\n"
" as requested, then instead of looping doing a high mq\n"
" workload, just busy loop. This will allow us to lock up a\n"
" single CPU just like we normally would, but without actually\n"
" thrashing the CPU cache. This is to make it easier to get\n"
" comparable numbers from some other workload running on the\n"
" other CPUs. One set of numbers with # CPUs locked up running\n"
" an mq workload, and another set of numbers with those same\n"
" CPUs locked away from the test workload, but not doing\n"
" anything to trash the cache like the mq workload might.\n"
" path Path name of the message queue to create\n"
"\n"
" Note: this program must be run as root in order to enable all tests\n"
"\n";
char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
#define min(a, b) ((a) < (b) ? (a) : (b))
#define MAX_CPUS 64
char *cpu_option_string;
int cpus_to_pin[MAX_CPUS];
int num_cpus_to_pin;
pthread_t cpu_threads[MAX_CPUS];
pthread_t main_thread;
cpu_set_t *cpu_set;
int cpu_set_size;
int cpus_online;
#define MSG_SIZE 16
#define TEST1_LOOPS 10000000
#define TEST2_LOOPS 100000
int continuous_mode;
int continuous_mode_fake;
struct rlimit saved_limits, cur_limits;
int saved_max_msgs, saved_max_msgsize;
int cur_max_msgs, cur_max_msgsize;
FILE *max_msgs, *max_msgsize;
int cur_nice;
char *queue_path = "/mq_perf_tests";
mqd_t queue = -1;
struct mq_attr result;
int mq_prio_max;
const struct poptOption options[] = {
{
.longName = "continuous",
.shortName = 'c',
.argInfo = POPT_ARG_STRING,
.arg = &cpu_option_string,
.val = 'c',
.descrip = "Run continuous tests at a high queue depth in "
"order to test the effects of cache thrashing on "
"other tasks on the system. This test is intended "
"to be run on one core of each physical CPU while "
"some other CPU intensive task is run on all the other "
"cores of that same physical CPU and the other task "
"is timed. It is assumed that the process of adding "
"messages to the message queue in a tight loop will "
"impact that other task to some degree. Once the "
"tests are performed in this way, you should then "
"re-run the tests using fake mode in order to check "
"the difference in time required to perform the CPU "
"intensive task",
.argDescrip = "cpu[,cpu]",
},
{
.longName = "fake",
.shortName = 'f',
.argInfo = POPT_ARG_NONE,
.arg = &continuous_mode_fake,
.val = 0,
.descrip = "Tie up the CPUs that we would normally tie up in"
"continuous mode, but don't actually do any mq stuff, "
"just keep the CPU busy so it can't be used to process "
"system level tasks as this would free up resources on "
"the other CPU cores and skew the comparison between "
"the no-mqueue work and mqueue work tests",
.argDescrip = NULL,
},
{
.longName = "path",
.shortName = 'p',
.argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT,
.arg = &queue_path,
.val = 'p',
.descrip = "The name of the path to use in the mqueue "
"filesystem for our tests",
.argDescrip = "pathname",
},
POPT_AUTOHELP
POPT_TABLEEND
};
static inline void __set(FILE *stream, int value, char *err_msg);
void shutdown(int exit_val, char *err_cause, int line_no);
void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context);
void sig_action(int signum, siginfo_t *info, void *context);
static inline int get(FILE *stream);
static inline void set(FILE *stream, int value);
static inline int try_set(FILE *stream, int value);
static inline void getr(int type, struct rlimit *rlim);
static inline void setr(int type, struct rlimit *rlim);
static inline void open_queue(struct mq_attr *attr);
void increase_limits(void);
static inline void __set(FILE *stream, int value, char *err_msg)
{
rewind(stream);
if (fprintf(stream, "%d", value) < 0)
perror(err_msg);
}
void shutdown(int exit_val, char *err_cause, int line_no)
{
static int in_shutdown = 0;
int errno_at_shutdown = errno;
int i;
/* In case we get called by multiple threads or from an sighandler */
if (in_shutdown++)
return;
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i]) {
pthread_kill(cpu_threads[i], SIGUSR1);
pthread_join(cpu_threads[i], NULL);
}
if (queue != -1)
if (mq_close(queue))
perror("mq_close() during shutdown");
if (queue_path)
/*
* Be silent if this fails, if we cleaned up already it's
* expected to fail
*/
mq_unlink(queue_path);
if (saved_max_msgs)
__set(max_msgs, saved_max_msgs,
"failed to restore saved_max_msgs");
if (saved_max_msgsize)
__set(max_msgsize, saved_max_msgsize,
"failed to restore saved_max_msgsize");
if (exit_val)
error(exit_val, errno_at_shutdown, "%s at %d",
err_cause, line_no);
exit(0);
}
void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context)
{
if (pthread_self() != main_thread)
pthread_exit(0);
else {
fprintf(stderr, "Caught signal %d in SIGUSR1 handler, "
"exiting\n", signum);
shutdown(0, "", 0);
fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
exit(0);
}
}
void sig_action(int signum, siginfo_t *info, void *context)
{
if (pthread_self() != main_thread)
pthread_kill(main_thread, signum);
else {
fprintf(stderr, "Caught signal %d, exiting\n", signum);
shutdown(0, "", 0);
fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
exit(0);
}
}
static inline int get(FILE *stream)
{
int value;
rewind(stream);
if (fscanf(stream, "%d", &value) != 1)
shutdown(4, "Error reading /proc entry", __LINE__);
return value;
}
static inline void set(FILE *stream, int value)
{
int new_value;
rewind(stream);
if (fprintf(stream, "%d", value) < 0)
return shutdown(5, "Failed writing to /proc file", __LINE__);
new_value = get(stream);
if (new_value != value)
return shutdown(5, "We didn't get what we wrote to /proc back",
__LINE__);
}
static inline int try_set(FILE *stream, int value)
{
int new_value;
rewind(stream);
fprintf(stream, "%d", value);
new_value = get(stream);
return new_value == value;
}
static inline void getr(int type, struct rlimit *rlim)
{
if (getrlimit(type, rlim))
shutdown(6, "getrlimit()", __LINE__);
}
static inline void setr(int type, struct rlimit *rlim)
{
if (setrlimit(type, rlim))
shutdown(7, "setrlimit()", __LINE__);
}
/**
* open_queue - open the global queue for testing
* @attr - An attr struct specifying the desired queue traits
* @result - An attr struct that lists the actual traits the queue has
*
* This open is not allowed to fail, failure will result in an orderly
* shutdown of the program. The global queue_path is used to set what
* queue to open, the queue descriptor is saved in the global queue
* variable.
*/
static inline void open_queue(struct mq_attr *attr)
{
int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK;
int perms = DEFFILEMODE;
queue = mq_open(queue_path, flags, perms, attr);
if (queue == -1)
shutdown(1, "mq_open()", __LINE__);
if (mq_getattr(queue, &result))
shutdown(1, "mq_getattr()", __LINE__);
printf("\n\tQueue %s created:\n", queue_path);
printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ?
"O_NONBLOCK" : "(null)");
printf("\t\tmq_maxmsg:\t\t\t%lu\n", result.mq_maxmsg);
printf("\t\tmq_msgsize:\t\t\t%lu\n", result.mq_msgsize);
printf("\t\tmq_curmsgs:\t\t\t%lu\n", result.mq_curmsgs);
}
void *fake_cont_thread(void *arg)
{
int i;
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i] == pthread_self())
break;
printf("\tStarted fake continuous mode thread %d on CPU %d\n", i,
cpus_to_pin[i]);
while (1)
;
}
void *cont_thread(void *arg)
{
char buff[MSG_SIZE];
int i, priority;
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i] == pthread_self())
break;
printf("\tStarted continuous mode thread %d on CPU %d\n", i,
cpus_to_pin[i]);
while (1) {
while (mq_send(queue, buff, sizeof(buff), 0) == 0)
;
mq_receive(queue, buff, sizeof(buff), &priority);
}
}
#define drain_queue() \
while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE)
#define do_untimed_send() \
do { \
if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
shutdown(3, "Test send failure", __LINE__); \
} while (0)
#define do_send_recv() \
do { \
clock_gettime(clock, &start); \
if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
shutdown(3, "Test send failure", __LINE__); \
clock_gettime(clock, &middle); \
if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \
shutdown(3, "Test receive failure", __LINE__); \
clock_gettime(clock, &end); \
nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \
(middle.tv_nsec - start.tv_nsec); \
send_total.tv_nsec += nsec; \
if (send_total.tv_nsec >= 1000000000) { \
send_total.tv_sec++; \
send_total.tv_nsec -= 1000000000; \
} \
nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \
(end.tv_nsec - middle.tv_nsec); \
recv_total.tv_nsec += nsec; \
if (recv_total.tv_nsec >= 1000000000) { \
recv_total.tv_sec++; \
recv_total.tv_nsec -= 1000000000; \
} \
} while (0)
struct test {
char *desc;
void (*func)(int *);
};
void const_prio(int *prio)
{
return;
}
void inc_prio(int *prio)
{
if (++*prio == mq_prio_max)
*prio = 0;
}
void dec_prio(int *prio)
{
if (--*prio < 0)
*prio = mq_prio_max - 1;
}
void random_prio(int *prio)
{
*prio = random() % mq_prio_max;
}
struct test test2[] = {
{"\n\tTest #2a: Time send/recv message, queue full, constant prio\n",
const_prio},
{"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n",
inc_prio},
{"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n",
dec_prio},
{"\n\tTest #2d: Time send/recv message, queue full, random prio\n",
random_prio},
{NULL, NULL}
};
/**
* Tests to perform (all done with MSG_SIZE messages):
*
* 1) Time to add/remove message with 0 messages on queue
* 1a) with constant prio
* 2) Time to add/remove message when queue close to capacity:
* 2a) with constant prio
* 2b) with increasing prio
* 2c) with decreasing prio
* 2d) with random prio
* 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX)
*/
void *perf_test_thread(void *arg)
{
char buff[MSG_SIZE];
int prio_out, prio_in;
int i;
clockid_t clock;
pthread_t *t;
struct timespec res, start, middle, end, send_total, recv_total;
unsigned long long nsec;
struct test *cur_test;
t = &cpu_threads[0];
printf("\n\tStarted mqueue performance test thread on CPU %d\n",
cpus_to_pin[0]);
mq_prio_max = sysconf(_SC_MQ_PRIO_MAX);
if (mq_prio_max == -1)
shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__);
if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0)
shutdown(2, "pthread_getcpuclockid", __LINE__);
if (clock_getres(clock, &res))
shutdown(2, "clock_getres()", __LINE__);
printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max);
printf("\t\tClock resolution:\t\t%lu nsec%s\n", res.tv_nsec,
res.tv_nsec > 1 ? "s" : "");
printf("\n\tTest #1: Time send/recv message, queue empty\n");
printf("\t\t(%d iterations)\n", TEST1_LOOPS);
prio_out = 0;
send_total.tv_sec = 0;
send_total.tv_nsec = 0;
recv_total.tv_sec = 0;
recv_total.tv_nsec = 0;
for (i = 0; i < TEST1_LOOPS; i++)
do_send_recv();
printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
send_total.tv_sec, send_total.tv_nsec);
nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
send_total.tv_nsec) / TEST1_LOOPS;
printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
recv_total.tv_sec, recv_total.tv_nsec);
nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
recv_total.tv_nsec) / TEST1_LOOPS;
printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
for (cur_test = test2; cur_test->desc != NULL; cur_test++) {
printf("%s:\n", cur_test->desc);
printf("\t\t(%d iterations)\n", TEST2_LOOPS);
prio_out = 0;
send_total.tv_sec = 0;
send_total.tv_nsec = 0;
recv_total.tv_sec = 0;
recv_total.tv_nsec = 0;
printf("\t\tFilling queue...");
fflush(stdout);
clock_gettime(clock, &start);
for (i = 0; i < result.mq_maxmsg - 1; i++) {
do_untimed_send();
cur_test->func(&prio_out);
}
clock_gettime(clock, &end);
nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
1000000000) + (end.tv_nsec - start.tv_nsec);
printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
nsec % 1000000000);
printf("\t\tTesting...");
fflush(stdout);
for (i = 0; i < TEST2_LOOPS; i++) {
do_send_recv();
cur_test->func(&prio_out);
}
printf("done.\n");
printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
send_total.tv_sec, send_total.tv_nsec);
nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
send_total.tv_nsec) / TEST2_LOOPS;
printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
recv_total.tv_sec, recv_total.tv_nsec);
nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
recv_total.tv_nsec) / TEST2_LOOPS;
printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
printf("\t\tDraining queue...");
fflush(stdout);
clock_gettime(clock, &start);
drain_queue();
clock_gettime(clock, &end);
nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
1000000000) + (end.tv_nsec - start.tv_nsec);
printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
nsec % 1000000000);
}
return 0;
}
void increase_limits(void)
{
cur_limits.rlim_cur = RLIM_INFINITY;
cur_limits.rlim_max = RLIM_INFINITY;
setr(RLIMIT_MSGQUEUE, &cur_limits);
while (try_set(max_msgs, cur_max_msgs += 10))
;
cur_max_msgs = get(max_msgs);
while (try_set(max_msgsize, cur_max_msgsize += 1024))
;
cur_max_msgsize = get(max_msgsize);
if (setpriority(PRIO_PROCESS, 0, -20) != 0)
shutdown(2, "setpriority()", __LINE__);
cur_nice = -20;
}
int main(int argc, char *argv[])
{
struct mq_attr attr;
char *option, *next_option;
int i, cpu;
struct sigaction sa;
poptContext popt_context;
char rc;
void *retval;
main_thread = pthread_self();
num_cpus_to_pin = 0;
if (sysconf(_SC_NPROCESSORS_ONLN) == -1) {
perror("sysconf(_SC_NPROCESSORS_ONLN)");
exit(1);
}
cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
cpu_set = CPU_ALLOC(cpus_online);
if (cpu_set == NULL) {
perror("CPU_ALLOC()");
exit(1);
}
cpu_set_size = CPU_ALLOC_SIZE(cpus_online);
CPU_ZERO_S(cpu_set_size, cpu_set);
popt_context = poptGetContext(NULL, argc, (const char **)argv,
options, 0);
while ((rc = poptGetNextOpt(popt_context)) > 0) {
switch (rc) {
case 'c':
continuous_mode = 1;
option = cpu_option_string;
do {
next_option = strchr(option, ',');
if (next_option)
*next_option = '\0';
cpu = atoi(option);
if (cpu >= cpus_online)
fprintf(stderr, "CPU %d exceeds "
"cpus online, ignoring.\n",
cpu);
else
cpus_to_pin[num_cpus_to_pin++] = cpu;
if (next_option)
option = ++next_option;
} while (next_option && num_cpus_to_pin < MAX_CPUS);
/* Double check that they didn't give us the same CPU
* more than once */
for (cpu = 0; cpu < num_cpus_to_pin; cpu++) {
if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size,
cpu_set)) {
fprintf(stderr, "Any given CPU may "
"only be given once.\n");
exit(1);
} else
CPU_SET_S(cpus_to_pin[cpu],
cpu_set_size, cpu_set);
}
break;
case 'p':
/*
* Although we can create a msg queue with a
* non-absolute path name, unlink will fail. So,
* if the name doesn't start with a /, add one
* when we save it.
*/
option = queue_path;
if (*option != '/') {
queue_path = malloc(strlen(option) + 2);
if (!queue_path) {
perror("malloc()");
exit(1);
}
queue_path[0] = '/';
queue_path[1] = 0;
strcat(queue_path, option);
free(option);
}
break;
}
}
if (continuous_mode && num_cpus_to_pin == 0) {
fprintf(stderr, "Must pass at least one CPU to continuous "
"mode.\n");
poptPrintUsage(popt_context, stderr, 0);
exit(1);
} else if (!continuous_mode) {
num_cpus_to_pin = 1;
cpus_to_pin[0] = cpus_online - 1;
}
if (getuid() != 0) {
fprintf(stderr, "Not running as root, but almost all tests "
"require root in order to modify\nsystem settings. "
"Exiting.\n");
exit(1);
}
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
if (!max_msgs)
shutdown(2, "Failed to open msg_max", __LINE__);
if (!max_msgsize)
shutdown(2, "Failed to open msgsize_max", __LINE__);
/* Load up the current system values for everything we can */
getr(RLIMIT_MSGQUEUE, &saved_limits);
cur_limits = saved_limits;
saved_max_msgs = cur_max_msgs = get(max_msgs);
saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
errno = 0;
cur_nice = getpriority(PRIO_PROCESS, 0);
if (errno)
shutdown(2, "getpriority()", __LINE__);
/* Tell the user our initial state */
printf("\nInitial system state:\n");
printf("\tUsing queue path:\t\t\t%s\n", queue_path);
printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
(long) saved_limits.rlim_cur);
printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
(long) saved_limits.rlim_max);
printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize);
printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs);
printf("\tNice value:\t\t\t\t%d\n", cur_nice);
printf("\n");
increase_limits();
printf("Adjusted system state for testing:\n");
if (cur_limits.rlim_cur == RLIM_INFINITY) {
printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n");
printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n");
} else {
printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
(long) cur_limits.rlim_cur);
printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
(long) cur_limits.rlim_max);
}
printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize);
printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs);
printf("\tNice value:\t\t\t\t%d\n", cur_nice);
printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ?
(continuous_mode_fake ? "fake mode" : "enabled") :
"disabled");
printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]);
for (cpu = 1; cpu < num_cpus_to_pin; cpu++)
printf(",%d", cpus_to_pin[cpu]);
printf("\n");
sa.sa_sigaction = sig_action_SIGUSR1;
sigemptyset(&sa.sa_mask);
sigaddset(&sa.sa_mask, SIGHUP);
sigaddset(&sa.sa_mask, SIGINT);
sigaddset(&sa.sa_mask, SIGQUIT);
sigaddset(&sa.sa_mask, SIGTERM);
sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGUSR1, &sa, NULL) == -1)
shutdown(1, "sigaction(SIGUSR1)", __LINE__);
sa.sa_sigaction = sig_action;
if (sigaction(SIGHUP, &sa, NULL) == -1)
shutdown(1, "sigaction(SIGHUP)", __LINE__);
if (sigaction(SIGINT, &sa, NULL) == -1)
shutdown(1, "sigaction(SIGINT)", __LINE__);
if (sigaction(SIGQUIT, &sa, NULL) == -1)
shutdown(1, "sigaction(SIGQUIT)", __LINE__);
if (sigaction(SIGTERM, &sa, NULL) == -1)
shutdown(1, "sigaction(SIGTERM)", __LINE__);
if (!continuous_mode_fake) {
attr.mq_flags = O_NONBLOCK;
attr.mq_maxmsg = cur_max_msgs;
attr.mq_msgsize = MSG_SIZE;
open_queue(&attr);
}
for (i = 0; i < num_cpus_to_pin; i++) {
pthread_attr_t thread_attr;
void *thread_func;
if (continuous_mode_fake)
thread_func = &fake_cont_thread;
else if (continuous_mode)
thread_func = &cont_thread;
else
thread_func = &perf_test_thread;
CPU_ZERO_S(cpu_set_size, cpu_set);
CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set);
pthread_attr_init(&thread_attr);
pthread_attr_setaffinity_np(&thread_attr, cpu_set_size,
cpu_set);
if (pthread_create(&cpu_threads[i], &thread_attr, thread_func,
NULL))
shutdown(1, "pthread_create()", __LINE__);
pthread_attr_destroy(&thread_attr);
}
if (!continuous_mode) {
pthread_join(cpu_threads[0], &retval);
shutdown((long)retval, "perf_test_thread()", __LINE__);
} else {
while (1)
sleep(1);
}
shutdown(0, "", 0);
}

View file

@ -0,0 +1,25 @@
# Makefile for net selftests
CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -O2 -g
CFLAGS += -I../../../../usr/include/
NET_PROGS = socket psock_fanout psock_tpacket
all: $(NET_PROGS)
%: %.c
$(CC) $(CFLAGS) -o $@ $^
run_tests: all
@/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
@/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
@if /sbin/modprobe test_bpf ; then \
/sbin/rmmod test_bpf; \
echo "test_bpf: ok"; \
else \
echo "test_bpf: [FAIL]"; \
exit 1; \
fi
clean:
$(RM) $(NET_PROGS)

View file

@ -0,0 +1,312 @@
/*
* Copyright 2013 Google Inc.
* Author: Willem de Bruijn (willemb@google.com)
*
* A basic test of packet socket fanout behavior.
*
* Control:
* - create fanout fails as expected with illegal flag combinations
* - join fanout fails as expected with diverging types or flags
*
* Datapath:
* Open a pair of packet sockets and a pair of INET sockets, send a known
* number of packets across the two INET sockets and count the number of
* packets enqueued onto the two packet sockets.
*
* The test currently runs for
* - PACKET_FANOUT_HASH
* - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
* - PACKET_FANOUT_LB
* - PACKET_FANOUT_CPU
* - PACKET_FANOUT_ROLLOVER
*
* Todo:
* - functionality: PACKET_FANOUT_FLAG_DEFRAG
*
* License (GPLv2):
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#define _GNU_SOURCE /* for sched_setaffinity */
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/filter.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "psock_lib.h"
#define RING_NUM_FRAMES 20
/* Open a socket in a given fanout mode.
* @return -1 if mode is bad, a valid socket otherwise */
static int sock_fanout_open(uint16_t typeflags, int num_packets)
{
int fd, val;
fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
if (fd < 0) {
perror("socket packet");
exit(1);
}
/* fanout group ID is always 0: tests whether old groups are deleted */
val = ((int) typeflags) << 16;
if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
if (close(fd)) {
perror("close packet");
exit(1);
}
return -1;
}
pair_udp_setfilter(fd);
return fd;
}
static char *sock_fanout_open_ring(int fd)
{
struct tpacket_req req = {
.tp_block_size = getpagesize(),
.tp_frame_size = getpagesize(),
.tp_block_nr = RING_NUM_FRAMES,
.tp_frame_nr = RING_NUM_FRAMES,
};
char *ring;
int val = TPACKET_V2;
if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
sizeof(val))) {
perror("packetsock ring setsockopt version");
exit(1);
}
if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
sizeof(req))) {
perror("packetsock ring setsockopt");
exit(1);
}
ring = mmap(0, req.tp_block_size * req.tp_block_nr,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (!ring) {
fprintf(stderr, "packetsock ring mmap\n");
exit(1);
}
return ring;
}
static int sock_fanout_read_ring(int fd, void *ring)
{
struct tpacket2_hdr *header = ring;
int count = 0;
while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
count++;
header = ring + (count * getpagesize());
}
return count;
}
static int sock_fanout_read(int fds[], char *rings[], const int expect[])
{
int ret[2];
ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
ret[0], ret[1], expect[0], expect[1]);
if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
(!(ret[0] == expect[1] && ret[1] == expect[0]))) {
fprintf(stderr, "ERROR: incorrect queue lengths\n");
return 1;
}
return 0;
}
/* Test illegal mode + flag combination */
static void test_control_single(void)
{
fprintf(stderr, "test: control single socket\n");
if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
fprintf(stderr, "ERROR: opened socket with dual rollover\n");
exit(1);
}
}
/* Test illegal group with different modes or flags */
static void test_control_group(void)
{
int fds[2];
fprintf(stderr, "test: control multiple sockets\n");
fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
if (fds[0] == -1) {
fprintf(stderr, "ERROR: failed to open HASH socket\n");
exit(1);
}
if (sock_fanout_open(PACKET_FANOUT_HASH |
PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
exit(1);
}
if (sock_fanout_open(PACKET_FANOUT_HASH |
PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
exit(1);
}
if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
fprintf(stderr, "ERROR: joined group with wrong mode\n");
exit(1);
}
fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
if (fds[1] == -1) {
fprintf(stderr, "ERROR: failed to join group\n");
exit(1);
}
if (close(fds[1]) || close(fds[0])) {
fprintf(stderr, "ERROR: closing sockets\n");
exit(1);
}
}
static int test_datapath(uint16_t typeflags, int port_off,
const int expect1[], const int expect2[])
{
const int expect0[] = { 0, 0 };
char *rings[2];
int fds[2], fds_udp[2][2], ret;
fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
fds[0] = sock_fanout_open(typeflags, 20);
fds[1] = sock_fanout_open(typeflags, 20);
if (fds[0] == -1 || fds[1] == -1) {
fprintf(stderr, "ERROR: failed open\n");
exit(1);
}
rings[0] = sock_fanout_open_ring(fds[0]);
rings[1] = sock_fanout_open_ring(fds[1]);
pair_udp_open(fds_udp[0], PORT_BASE);
pair_udp_open(fds_udp[1], PORT_BASE + port_off);
sock_fanout_read(fds, rings, expect0);
/* Send data, but not enough to overflow a queue */
pair_udp_send(fds_udp[0], 15);
pair_udp_send(fds_udp[1], 5);
ret = sock_fanout_read(fds, rings, expect1);
/* Send more data, overflow the queue */
pair_udp_send(fds_udp[0], 15);
/* TODO: ensure consistent order between expect1 and expect2 */
ret |= sock_fanout_read(fds, rings, expect2);
if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
fprintf(stderr, "close rings\n");
exit(1);
}
if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
close(fds[1]) || close(fds[0])) {
fprintf(stderr, "close datapath\n");
exit(1);
}
return ret;
}
static int set_cpuaffinity(int cpuid)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpuid, &mask);
if (sched_setaffinity(0, sizeof(mask), &mask)) {
if (errno != EINVAL) {
fprintf(stderr, "setaffinity %d\n", cpuid);
exit(1);
}
return 1;
}
return 0;
}
int main(int argc, char **argv)
{
const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } };
const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
int port_off = 2, tries = 5, ret;
test_control_single();
test_control_group();
/* find a set of ports that do not collide onto the same socket */
ret = test_datapath(PACKET_FANOUT_HASH, port_off,
expect_hash[0], expect_hash[1]);
while (ret && tries--) {
fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
expect_hash[0], expect_hash[1]);
}
ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
port_off, expect_hash_rb[0], expect_hash_rb[1]);
ret |= test_datapath(PACKET_FANOUT_LB,
port_off, expect_lb[0], expect_lb[1]);
ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
port_off, expect_rb[0], expect_rb[1]);
set_cpuaffinity(0);
ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
expect_cpu0[0], expect_cpu0[1]);
if (!set_cpuaffinity(1))
/* TODO: test that choice alternates with previous */
ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
expect_cpu1[0], expect_cpu1[1]);
if (ret)
return 1;
printf("OK. All tests passed\n");
return 0;
}

View file

@ -0,0 +1,127 @@
/*
* Copyright 2013 Google Inc.
* Author: Willem de Bruijn <willemb@google.com>
* Daniel Borkmann <dborkman@redhat.com>
*
* License (GPLv2):
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef PSOCK_LIB_H
#define PSOCK_LIB_H
#include <sys/types.h>
#include <sys/socket.h>
#include <string.h>
#include <arpa/inet.h>
#include <unistd.h>
#define DATA_LEN 100
#define DATA_CHAR 'a'
#define PORT_BASE 8000
#ifndef __maybe_unused
# define __maybe_unused __attribute__ ((__unused__))
#endif
static __maybe_unused void pair_udp_setfilter(int fd)
{
struct sock_filter bpf_filter[] = {
{ 0x80, 0, 0, 0x00000000 }, /* LD pktlen */
{ 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
{ 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */
{ 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
{ 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */
{ 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
{ 0x06, 0, 0, 0x00000060 }, /* RET match */
{ 0x06, 0, 0, 0x00000000 }, /* RET no match */
};
struct sock_fprog bpf_prog;
bpf_prog.filter = bpf_filter;
bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
sizeof(bpf_prog))) {
perror("setsockopt SO_ATTACH_FILTER");
exit(1);
}
}
static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
{
struct sockaddr_in saddr, daddr;
fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
if (fds[0] == -1 || fds[1] == -1) {
fprintf(stderr, "ERROR: socket dgram\n");
exit(1);
}
memset(&saddr, 0, sizeof(saddr));
saddr.sin_family = AF_INET;
saddr.sin_port = htons(port);
saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
memset(&daddr, 0, sizeof(daddr));
daddr.sin_family = AF_INET;
daddr.sin_port = htons(port + 1);
daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
/* must bind both to get consistent hash result */
if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
perror("bind");
exit(1);
}
if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
perror("bind");
exit(1);
}
if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
perror("connect");
exit(1);
}
}
static __maybe_unused void pair_udp_send(int fds[], int num)
{
char buf[DATA_LEN], rbuf[DATA_LEN];
memset(buf, DATA_CHAR, sizeof(buf));
while (num--) {
/* Should really handle EINTR and EAGAIN */
if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
fprintf(stderr, "ERROR: send failed left=%d\n", num);
exit(1);
}
if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
fprintf(stderr, "ERROR: recv failed left=%d\n", num);
exit(1);
}
if (memcmp(buf, rbuf, sizeof(buf))) {
fprintf(stderr, "ERROR: data failed left=%d\n", num);
exit(1);
}
}
}
static __maybe_unused void pair_udp_close(int fds[])
{
close(fds[0]);
close(fds[1]);
}
#endif /* PSOCK_LIB_H */

View file

@ -0,0 +1,805 @@
/*
* Copyright 2013 Red Hat, Inc.
* Author: Daniel Borkmann <dborkman@redhat.com>
* Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
*
* A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
*
* Control:
* Test the setup of the TPACKET socket with different patterns that are
* known to fail (TODO) resp. succeed (OK).
*
* Datapath:
* Open a pair of packet sockets and send resp. receive an a priori known
* packet pattern accross the sockets and check if it was received resp.
* sent correctly. Fanout in combination with RX_RING is currently not
* tested here.
*
* The test currently runs for
* - TPACKET_V1: RX_RING, TX_RING
* - TPACKET_V2: RX_RING, TX_RING
* - TPACKET_V3: RX_RING
*
* License (GPLv2):
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <linux/if_packet.h>
#include <linux/filter.h>
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
#include <bits/wordsize.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include <net/if.h>
#include <inttypes.h>
#include <poll.h>
#include "psock_lib.h"
#ifndef bug_on
# define bug_on(cond) assert(!(cond))
#endif
#ifndef __aligned_tpacket
# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT)))
#endif
#ifndef __align_tpacket
# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x))))
#endif
#define NUM_PACKETS 100
#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1))
struct ring {
struct iovec *rd;
uint8_t *mm_space;
size_t mm_len, rd_len;
struct sockaddr_ll ll;
void (*walk)(int sock, struct ring *ring);
int type, rd_num, flen, version;
union {
struct tpacket_req req;
struct tpacket_req3 req3;
};
};
struct block_desc {
uint32_t version;
uint32_t offset_to_priv;
struct tpacket_hdr_v1 h1;
};
union frame_map {
struct {
struct tpacket_hdr tp_h __aligned_tpacket;
struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
} *v1;
struct {
struct tpacket2_hdr tp_h __aligned_tpacket;
struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
} *v2;
void *raw;
};
static unsigned int total_packets, total_bytes;
static int pfsocket(int ver)
{
int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (sock == -1) {
perror("socket");
exit(1);
}
ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
if (ret == -1) {
perror("setsockopt");
exit(1);
}
return sock;
}
static void status_bar_update(void)
{
if (total_packets % 10 == 0) {
fprintf(stderr, ".");
fflush(stderr);
}
}
static void test_payload(void *pay, size_t len)
{
struct ethhdr *eth = pay;
if (len < sizeof(struct ethhdr)) {
fprintf(stderr, "test_payload: packet too "
"small: %zu bytes!\n", len);
exit(1);
}
if (eth->h_proto != htons(ETH_P_IP)) {
fprintf(stderr, "test_payload: wrong ethernet "
"type: 0x%x!\n", ntohs(eth->h_proto));
exit(1);
}
}
static void create_payload(void *pay, size_t *len)
{
int i;
struct ethhdr *eth = pay;
struct iphdr *ip = pay + sizeof(*eth);
/* Lets create some broken crap, that still passes
* our BPF filter.
*/
*len = DATA_LEN + 42;
memset(pay, 0xff, ETH_ALEN * 2);
eth->h_proto = htons(ETH_P_IP);
for (i = 0; i < sizeof(*ip); ++i)
((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
ip->ihl = 5;
ip->version = 4;
ip->protocol = 0x11;
ip->frag_off = 0;
ip->ttl = 64;
ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
ip->saddr = htonl(INADDR_LOOPBACK);
ip->daddr = htonl(INADDR_LOOPBACK);
memset(pay + sizeof(*eth) + sizeof(*ip),
DATA_CHAR, DATA_LEN);
}
static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
{
return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
}
static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
{
hdr->tp_status = TP_STATUS_KERNEL;
__sync_synchronize();
}
static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
{
return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
}
static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
{
hdr->tp_status = TP_STATUS_KERNEL;
__sync_synchronize();
}
static inline int __v1_v2_rx_kernel_ready(void *base, int version)
{
switch (version) {
case TPACKET_V1:
return __v1_rx_kernel_ready(base);
case TPACKET_V2:
return __v2_rx_kernel_ready(base);
default:
bug_on(1);
return 0;
}
}
static inline void __v1_v2_rx_user_ready(void *base, int version)
{
switch (version) {
case TPACKET_V1:
__v1_rx_user_ready(base);
break;
case TPACKET_V2:
__v2_rx_user_ready(base);
break;
}
}
static void walk_v1_v2_rx(int sock, struct ring *ring)
{
struct pollfd pfd;
int udp_sock[2];
union frame_map ppd;
unsigned int frame_num = 0;
bug_on(ring->type != PACKET_RX_RING);
pair_udp_open(udp_sock, PORT_BASE);
pair_udp_setfilter(sock);
memset(&pfd, 0, sizeof(pfd));
pfd.fd = sock;
pfd.events = POLLIN | POLLERR;
pfd.revents = 0;
pair_udp_send(udp_sock, NUM_PACKETS);
while (total_packets < NUM_PACKETS * 2) {
while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
ring->version)) {
ppd.raw = ring->rd[frame_num].iov_base;
switch (ring->version) {
case TPACKET_V1:
test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
ppd.v1->tp_h.tp_snaplen);
total_bytes += ppd.v1->tp_h.tp_snaplen;
break;
case TPACKET_V2:
test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
ppd.v2->tp_h.tp_snaplen);
total_bytes += ppd.v2->tp_h.tp_snaplen;
break;
}
status_bar_update();
total_packets++;
__v1_v2_rx_user_ready(ppd.raw, ring->version);
frame_num = (frame_num + 1) % ring->rd_num;
}
poll(&pfd, 1, 1);
}
pair_udp_close(udp_sock);
if (total_packets != 2 * NUM_PACKETS) {
fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
ring->version, total_packets, NUM_PACKETS);
exit(1);
}
fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
}
static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
{
return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
}
static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
{
hdr->tp_status = TP_STATUS_SEND_REQUEST;
__sync_synchronize();
}
static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
{
return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
}
static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
{
hdr->tp_status = TP_STATUS_SEND_REQUEST;
__sync_synchronize();
}
static inline int __v1_v2_tx_kernel_ready(void *base, int version)
{
switch (version) {
case TPACKET_V1:
return __v1_tx_kernel_ready(base);
case TPACKET_V2:
return __v2_tx_kernel_ready(base);
default:
bug_on(1);
return 0;
}
}
static inline void __v1_v2_tx_user_ready(void *base, int version)
{
switch (version) {
case TPACKET_V1:
__v1_tx_user_ready(base);
break;
case TPACKET_V2:
__v2_tx_user_ready(base);
break;
}
}
static void __v1_v2_set_packet_loss_discard(int sock)
{
int ret, discard = 1;
ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
sizeof(discard));
if (ret == -1) {
perror("setsockopt");
exit(1);
}
}
static void walk_v1_v2_tx(int sock, struct ring *ring)
{
struct pollfd pfd;
int rcv_sock, ret;
size_t packet_len;
union frame_map ppd;
char packet[1024];
unsigned int frame_num = 0, got = 0;
struct sockaddr_ll ll = {
.sll_family = PF_PACKET,
.sll_halen = ETH_ALEN,
};
bug_on(ring->type != PACKET_TX_RING);
bug_on(ring->rd_num < NUM_PACKETS);
rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (rcv_sock == -1) {
perror("socket");
exit(1);
}
pair_udp_setfilter(rcv_sock);
ll.sll_ifindex = if_nametoindex("lo");
ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
if (ret == -1) {
perror("bind");
exit(1);
}
memset(&pfd, 0, sizeof(pfd));
pfd.fd = sock;
pfd.events = POLLOUT | POLLERR;
pfd.revents = 0;
total_packets = NUM_PACKETS;
create_payload(packet, &packet_len);
while (total_packets > 0) {
while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
ring->version) &&
total_packets > 0) {
ppd.raw = ring->rd[frame_num].iov_base;
switch (ring->version) {
case TPACKET_V1:
ppd.v1->tp_h.tp_snaplen = packet_len;
ppd.v1->tp_h.tp_len = packet_len;
memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
sizeof(struct sockaddr_ll), packet,
packet_len);
total_bytes += ppd.v1->tp_h.tp_snaplen;
break;
case TPACKET_V2:
ppd.v2->tp_h.tp_snaplen = packet_len;
ppd.v2->tp_h.tp_len = packet_len;
memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
sizeof(struct sockaddr_ll), packet,
packet_len);
total_bytes += ppd.v2->tp_h.tp_snaplen;
break;
}
status_bar_update();
total_packets--;
__v1_v2_tx_user_ready(ppd.raw, ring->version);
frame_num = (frame_num + 1) % ring->rd_num;
}
poll(&pfd, 1, 1);
}
bug_on(total_packets != 0);
ret = sendto(sock, NULL, 0, 0, NULL, 0);
if (ret == -1) {
perror("sendto");
exit(1);
}
while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
0, NULL, NULL)) > 0 &&
total_packets < NUM_PACKETS) {
got += ret;
test_payload(packet, ret);
status_bar_update();
total_packets++;
}
close(rcv_sock);
if (total_packets != NUM_PACKETS) {
fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
ring->version, total_packets, NUM_PACKETS);
exit(1);
}
fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
}
static void walk_v1_v2(int sock, struct ring *ring)
{
if (ring->type == PACKET_RX_RING)
walk_v1_v2_rx(sock, ring);
else
walk_v1_v2_tx(sock, ring);
}
static uint64_t __v3_prev_block_seq_num = 0;
void __v3_test_block_seq_num(struct block_desc *pbd)
{
if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
"seq:%"PRIu64" != actual seq:%"PRIu64"\n",
__v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
(uint64_t) pbd->h1.seq_num);
exit(1);
}
__v3_prev_block_seq_num = pbd->h1.seq_num;
}
static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
{
if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
fprintf(stderr, "\nblock:%u with %upackets, expected "
"len:%u != actual len:%u\n", block_num,
pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
exit(1);
}
}
static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
{
if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
exit(1);
}
__v3_test_block_seq_num(pbd);
}
static void __v3_walk_block(struct block_desc *pbd, const int block_num)
{
int num_pkts = pbd->h1.num_pkts, i;
unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
struct tpacket3_hdr *ppd;
__v3_test_block_header(pbd, block_num);
ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
pbd->h1.offset_to_first_pkt);
for (i = 0; i < num_pkts; ++i) {
bytes += ppd->tp_snaplen;
if (ppd->tp_next_offset)
bytes_with_padding += ppd->tp_next_offset;
else
bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
status_bar_update();
total_packets++;
ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
__sync_synchronize();
}
__v3_test_block_len(pbd, bytes_with_padding, block_num);
total_bytes += bytes;
}
void __v3_flush_block(struct block_desc *pbd)
{
pbd->h1.block_status = TP_STATUS_KERNEL;
__sync_synchronize();
}
static void walk_v3_rx(int sock, struct ring *ring)
{
unsigned int block_num = 0;
struct pollfd pfd;
struct block_desc *pbd;
int udp_sock[2];
bug_on(ring->type != PACKET_RX_RING);
pair_udp_open(udp_sock, PORT_BASE);
pair_udp_setfilter(sock);
memset(&pfd, 0, sizeof(pfd));
pfd.fd = sock;
pfd.events = POLLIN | POLLERR;
pfd.revents = 0;
pair_udp_send(udp_sock, NUM_PACKETS);
while (total_packets < NUM_PACKETS * 2) {
pbd = (struct block_desc *) ring->rd[block_num].iov_base;
while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
poll(&pfd, 1, 1);
__v3_walk_block(pbd, block_num);
__v3_flush_block(pbd);
block_num = (block_num + 1) % ring->rd_num;
}
pair_udp_close(udp_sock);
if (total_packets != 2 * NUM_PACKETS) {
fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
total_packets, NUM_PACKETS);
exit(1);
}
fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
}
static void walk_v3(int sock, struct ring *ring)
{
if (ring->type == PACKET_RX_RING)
walk_v3_rx(sock, ring);
else
bug_on(1);
}
static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
{
ring->req.tp_block_size = getpagesize() << 2;
ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
ring->req.tp_block_nr = blocks;
ring->req.tp_frame_nr = ring->req.tp_block_size /
ring->req.tp_frame_size *
ring->req.tp_block_nr;
ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
ring->walk = walk_v1_v2;
ring->rd_num = ring->req.tp_frame_nr;
ring->flen = ring->req.tp_frame_size;
}
static void __v3_fill(struct ring *ring, unsigned int blocks)
{
ring->req3.tp_retire_blk_tov = 64;
ring->req3.tp_sizeof_priv = 0;
ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
ring->req3.tp_block_size = getpagesize() << 2;
ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
ring->req3.tp_block_nr = blocks;
ring->req3.tp_frame_nr = ring->req3.tp_block_size /
ring->req3.tp_frame_size *
ring->req3.tp_block_nr;
ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
ring->walk = walk_v3;
ring->rd_num = ring->req3.tp_block_nr;
ring->flen = ring->req3.tp_block_size;
}
static void setup_ring(int sock, struct ring *ring, int version, int type)
{
int ret = 0;
unsigned int blocks = 256;
ring->type = type;
ring->version = version;
switch (version) {
case TPACKET_V1:
case TPACKET_V2:
if (type == PACKET_TX_RING)
__v1_v2_set_packet_loss_discard(sock);
__v1_v2_fill(ring, blocks);
ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
sizeof(ring->req));
break;
case TPACKET_V3:
__v3_fill(ring, blocks);
ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
sizeof(ring->req3));
break;
}
if (ret == -1) {
perror("setsockopt");
exit(1);
}
ring->rd_len = ring->rd_num * sizeof(*ring->rd);
ring->rd = malloc(ring->rd_len);
if (ring->rd == NULL) {
perror("malloc");
exit(1);
}
total_packets = 0;
total_bytes = 0;
}
static void mmap_ring(int sock, struct ring *ring)
{
int i;
ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
if (ring->mm_space == MAP_FAILED) {
perror("mmap");
exit(1);
}
memset(ring->rd, 0, ring->rd_len);
for (i = 0; i < ring->rd_num; ++i) {
ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
ring->rd[i].iov_len = ring->flen;
}
}
static void bind_ring(int sock, struct ring *ring)
{
int ret;
ring->ll.sll_family = PF_PACKET;
ring->ll.sll_protocol = htons(ETH_P_ALL);
ring->ll.sll_ifindex = if_nametoindex("lo");
ring->ll.sll_hatype = 0;
ring->ll.sll_pkttype = 0;
ring->ll.sll_halen = 0;
ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
if (ret == -1) {
perror("bind");
exit(1);
}
}
static void walk_ring(int sock, struct ring *ring)
{
ring->walk(sock, ring);
}
static void unmap_ring(int sock, struct ring *ring)
{
munmap(ring->mm_space, ring->mm_len);
free(ring->rd);
}
static int test_kernel_bit_width(void)
{
char in[512], *ptr;
int num = 0, fd;
ssize_t ret;
fd = open("/proc/kallsyms", O_RDONLY);
if (fd == -1) {
perror("open");
exit(1);
}
ret = read(fd, in, sizeof(in));
if (ret <= 0) {
perror("read");
exit(1);
}
close(fd);
ptr = in;
while(!isspace(*ptr)) {
num++;
ptr++;
}
return num * 4;
}
static int test_user_bit_width(void)
{
return __WORDSIZE;
}
static const char *tpacket_str[] = {
[TPACKET_V1] = "TPACKET_V1",
[TPACKET_V2] = "TPACKET_V2",
[TPACKET_V3] = "TPACKET_V3",
};
static const char *type_str[] = {
[PACKET_RX_RING] = "PACKET_RX_RING",
[PACKET_TX_RING] = "PACKET_TX_RING",
};
static int test_tpacket(int version, int type)
{
int sock;
struct ring ring;
fprintf(stderr, "test: %s with %s ", tpacket_str[version],
type_str[type]);
fflush(stderr);
if (version == TPACKET_V1 &&
test_kernel_bit_width() != test_user_bit_width()) {
fprintf(stderr, "test: skip %s %s since user and kernel "
"space have different bit width\n",
tpacket_str[version], type_str[type]);
return 0;
}
sock = pfsocket(version);
memset(&ring, 0, sizeof(ring));
setup_ring(sock, &ring, version, type);
mmap_ring(sock, &ring);
bind_ring(sock, &ring);
walk_ring(sock, &ring);
unmap_ring(sock, &ring);
close(sock);
fprintf(stderr, "\n");
return 0;
}
int main(void)
{
int ret = 0;
ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
if (ret)
return 1;
printf("OK. All tests passed\n");
return 0;
}

View file

@ -0,0 +1,26 @@
#!/bin/sh
if [ $(id -u) != 0 ]; then
echo $msg must be run as root >&2
exit 0
fi
echo "--------------------"
echo "running psock_fanout test"
echo "--------------------"
./psock_fanout
if [ $? -ne 0 ]; then
echo "[FAIL]"
else
echo "[PASS]"
fi
echo "--------------------"
echo "running psock_tpacket test"
echo "--------------------"
./psock_tpacket
if [ $? -ne 0 ]; then
echo "[FAIL]"
else
echo "[PASS]"
fi

View file

@ -0,0 +1,12 @@
#!/bin/bash
echo "--------------------"
echo "running socket test"
echo "--------------------"
./socket
if [ $? -ne 0 ]; then
echo "[FAIL]"
else
echo "[PASS]"
fi

View file

@ -0,0 +1,92 @@
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
struct socket_testcase {
int domain;
int type;
int protocol;
/* 0 = valid file descriptor
* -foo = error foo
*/
int expect;
/* If non-zero, accept EAFNOSUPPORT to handle the case
* of the protocol not being configured into the kernel.
*/
int nosupport_ok;
};
static struct socket_testcase tests[] = {
{ AF_MAX, 0, 0, -EAFNOSUPPORT, 0 },
{ AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 },
{ AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 },
{ AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 },
{ AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
};
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define ERR_STRING_SZ 64
static int run_tests(void)
{
char err_string1[ERR_STRING_SZ];
char err_string2[ERR_STRING_SZ];
int i, err;
err = 0;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct socket_testcase *s = &tests[i];
int fd;
fd = socket(s->domain, s->type, s->protocol);
if (fd < 0) {
if (s->nosupport_ok &&
errno == EAFNOSUPPORT)
continue;
if (s->expect < 0 &&
errno == -s->expect)
continue;
strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
strerror_r(errno, err_string2, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"err (%s) got (%s)\n",
s->domain, s->type, s->protocol,
err_string1, err_string2);
err = -1;
break;
} else {
close(fd);
if (s->expect < 0) {
strerror_r(errno, err_string1, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"success got err (%s)\n",
s->domain, s->type, s->protocol,
err_string1);
err = -1;
break;
}
}
}
return err;
}
int main(void)
{
int err = run_tests();
return err;
}

View file

@ -0,0 +1,39 @@
# Makefile for powerpc selftests
# ARCH can be overridden by the user for cross compiling
ARCH ?= $(shell uname -m)
ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
ifeq ($(ARCH),powerpc)
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
CC := $(CROSS_COMPILE)$(CC)
CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
export CC CFLAGS
TARGETS = pmu copyloops mm tm primitives
endif
all: $(TARGETS)
$(TARGETS):
$(MAKE) -k -C $@ all
run_tests: all
@for TARGET in $(TARGETS); do \
$(MAKE) -C $$TARGET run_tests; \
done;
clean:
@for TARGET in $(TARGETS); do \
$(MAKE) -C $$TARGET clean; \
done;
rm -f tags
tags:
find . -name '*.c' -o -name '*.h' | xargs ctags
.PHONY: all run_tests clean tags $(TARGETS)

View file

@ -0,0 +1,29 @@
# The loops are all 64-bit code
CFLAGS += -m64
CFLAGS += -I$(CURDIR)
CFLAGS += -D SELFTEST
# Use our CFLAGS for the implicit .S rule
ASFLAGS = $(CFLAGS)
PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
EXTRA_SOURCES := validate.c ../harness.c
all: $(PROGS)
copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy
memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
$(PROGS): $(EXTRA_SOURCES)
run_tests: all
@-for PROG in $(PROGS); do \
./$$PROG; \
done;
clean:
rm -f $(PROGS) *.o
.PHONY: all run_tests clean

View file

@ -0,0 +1,89 @@
#include <ppc-asm.h>
#define CONFIG_ALTIVEC
#define r1 1
#define vr0 0
#define vr1 1
#define vr2 2
#define vr3 3
#define vr4 4
#define vr5 5
#define vr6 6
#define vr7 7
#define vr8 8
#define vr9 9
#define vr10 10
#define vr11 11
#define vr12 12
#define vr13 13
#define vr14 14
#define vr15 15
#define vr16 16
#define vr17 17
#define vr18 18
#define vr19 19
#define vr20 20
#define vr21 21
#define vr22 22
#define vr23 23
#define vr24 24
#define vr25 25
#define vr26 26
#define vr27 27
#define vr28 28
#define vr29 29
#define vr30 30
#define vr31 31
#define R14 r14
#define R15 r15
#define R16 r16
#define R17 r17
#define R18 r18
#define R19 r19
#define R20 r20
#define R21 r21
#define R22 r22
#define R29 r29
#define R30 r30
#define R31 r31
#define STACKFRAMESIZE 256
#define STK_REG(i) (112 + ((i)-14)*8)
#define _GLOBAL(A) FUNC_START(test_ ## A)
#define _GLOBAL_TOC(A) _GLOBAL(A)
#define PPC_MTOCRF(A, B) mtocrf A, B
FUNC_START(enter_vmx_usercopy)
li r3,1
blr
FUNC_START(exit_vmx_usercopy)
li r3,0
blr
FUNC_START(enter_vmx_copy)
li r3,1
blr
FUNC_START(exit_vmx_copy)
blr
FUNC_START(memcpy_power7)
blr
FUNC_START(__copy_tofrom_user_power7)
blr
FUNC_START(__copy_tofrom_user_base)
blr
#define BEGIN_FTR_SECTION
#define FTR_SECTION_ELSE
#define ALT_FTR_SECTION_END_IFCLR(x)
#define ALT_FTR_SECTION_END(x, y)
#define END_FTR_SECTION_IFCLR(x)

View file

@ -0,0 +1 @@
../../../../../arch/powerpc/lib/copyuser_64.S

View file

@ -0,0 +1 @@
../../../../../arch/powerpc/lib/copyuser_power7.S

View file

@ -0,0 +1 @@
../../../../../arch/powerpc/lib/memcpy_64.S

View file

@ -0,0 +1 @@
../../../../../arch/powerpc/lib/memcpy_power7.S

View file

@ -0,0 +1,99 @@
#include <malloc.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include "../utils.h"
#define MAX_LEN 8192
#define MAX_OFFSET 16
#define MIN_REDZONE 128
#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
#define POISON 0xa5
unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
static void do_one(char *src, char *dst, unsigned long src_off,
unsigned long dst_off, unsigned long len, void *redzone,
void *fill)
{
char *srcp, *dstp;
unsigned long ret;
unsigned long i;
srcp = src + MIN_REDZONE + src_off;
dstp = dst + MIN_REDZONE + dst_off;
memset(src, POISON, BUFLEN);
memset(dst, POISON, BUFLEN);
memcpy(srcp, fill, len);
ret = COPY_LOOP(dstp, srcp, len);
if (ret && ret != (unsigned long)dstp) {
printf("(%p,%p,%ld) returned %ld\n", dstp, srcp, len, ret);
abort();
}
if (memcmp(dstp, srcp, len)) {
printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
printf("src: ");
for (i = 0; i < len; i++)
printf("%02x ", srcp[i]);
printf("\ndst: ");
for (i = 0; i < len; i++)
printf("%02x ", dstp[i]);
printf("\n");
abort();
}
if (memcmp(dst, redzone, dstp - dst)) {
printf("(%p,%p,%ld) redzone before corrupted\n",
dstp, srcp, len);
abort();
}
if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
printf("(%p,%p,%ld) redzone after corrupted\n",
dstp, srcp, len);
abort();
}
}
int test_copy_loop(void)
{
char *src, *dst, *redzone, *fill;
unsigned long len, src_off, dst_off;
unsigned long i;
src = memalign(BUFLEN, BUFLEN);
dst = memalign(BUFLEN, BUFLEN);
redzone = malloc(BUFLEN);
fill = malloc(BUFLEN);
if (!src || !dst || !redzone || !fill) {
fprintf(stderr, "malloc failed\n");
exit(1);
}
memset(redzone, POISON, BUFLEN);
/* Fill with sequential bytes */
for (i = 0; i < BUFLEN; i++)
fill[i] = i & 0xff;
for (len = 1; len < MAX_LEN; len++) {
for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
do_one(src, dst, src_off, dst_off, len,
redzone, fill);
}
}
}
return 0;
}
int main(void)
{
return test_harness(test_copy_loop, str(COPY_LOOP));
}

View file

@ -0,0 +1,114 @@
/*
* Copyright 2013, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <errno.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "subunit.h"
#include "utils.h"
#define TIMEOUT 120
#define KILL_TIMEOUT 5
int run_test(int (test_function)(void), char *name)
{
bool terminated;
int rc, status;
pid_t pid;
/* Make sure output is flushed before forking */
fflush(stdout);
pid = fork();
if (pid == 0) {
setpgid(0, 0);
exit(test_function());
} else if (pid == -1) {
perror("fork");
return 1;
}
setpgid(pid, pid);
/* Wake us up in timeout seconds */
alarm(TIMEOUT);
terminated = false;
wait:
rc = waitpid(pid, &status, 0);
if (rc == -1) {
if (errno != EINTR) {
printf("unknown error from waitpid\n");
return 1;
}
if (terminated) {
printf("!! force killing %s\n", name);
kill(-pid, SIGKILL);
return 1;
} else {
printf("!! killing %s\n", name);
kill(-pid, SIGTERM);
terminated = true;
alarm(KILL_TIMEOUT);
goto wait;
}
}
/* Kill anything else in the process group that is still running */
kill(-pid, SIGTERM);
if (WIFEXITED(status))
status = WEXITSTATUS(status);
else {
if (WIFSIGNALED(status))
printf("!! child died by signal %d\n", WTERMSIG(status));
else
printf("!! child died by unknown cause\n");
status = 1; /* Signal or other */
}
return status;
}
static void alarm_handler(int signum)
{
/* Jut wake us up from waitpid */
}
static struct sigaction alarm_action = {
.sa_handler = alarm_handler,
};
int test_harness(int (test_function)(void), char *name)
{
int rc;
test_start(name);
test_set_git_version(GIT_VERSION);
if (sigaction(SIGALRM, &alarm_action, NULL)) {
perror("sigaction");
test_error(name);
return 1;
}
rc = run_test(test_function, name);
if (rc == MAGIC_SKIP_RETURN_VALUE)
test_skip(name);
else
test_finish(name, rc);
return rc;
}

View file

@ -0,0 +1,18 @@
noarg:
$(MAKE) -C ../
PROGS := hugetlb_vs_thp_test
all: $(PROGS)
$(PROGS): ../harness.c
run_tests: all
@-for PROG in $(PROGS); do \
./$$PROG; \
done;
clean:
rm -f $(PROGS)
.PHONY: all run_tests clean

View file

@ -0,0 +1,72 @@
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
#include "utils.h"
/* This must match the huge page & THP size */
#define SIZE (16 * 1024 * 1024)
static int test_body(void)
{
void *addr;
char *p;
addr = (void *)0xa0000000;
p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p != MAP_FAILED) {
/*
* Typically the mmap will fail because no huge pages are
* allocated on the system. But if there are huge pages
* allocated the mmap will succeed. That's fine too, we just
* munmap here before continuing.
*/
munmap(addr, SIZE);
}
p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p == MAP_FAILED) {
printf("Mapping failed @ %p\n", addr);
perror("mmap");
return 1;
}
/*
* Either a user or kernel access is sufficient to trigger the bug.
* A kernel access is easier to spot & debug, as it will trigger the
* softlockup or RCU stall detectors, and when the system is kicked
* into xmon we get a backtrace in the kernel.
*
* A good option is:
* getcwd(p, SIZE);
*
* For the purposes of this testcase it's preferable to spin in
* userspace, so the harness can kill us if we get stuck. That way we
* see a test failure rather than a dead system.
*/
*p = 0xf;
munmap(addr, SIZE);
return 0;
}
static int test_main(void)
{
int i;
/* 10,000 because it's a "bunch", and completes reasonably quickly */
for (i = 0; i < 10000; i++)
if (test_body())
return 1;
return 0;
}
int main(void)
{
return test_harness(test_main, "hugetlb_vs_thp");
}

View file

@ -0,0 +1,38 @@
noarg:
$(MAKE) -C ../
PROGS := count_instructions l3_bank_test per_event_excludes
EXTRA_SOURCES := ../harness.c event.c lib.c
SUB_TARGETS = ebb
all: $(PROGS) $(SUB_TARGETS)
$(PROGS): $(EXTRA_SOURCES)
# loop.S can only be built 64-bit
count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
$(CC) $(CFLAGS) -m64 -o $@ $^
run_tests: all sub_run_tests
@-for PROG in $(PROGS); do \
./$$PROG; \
done;
clean: sub_clean
rm -f $(PROGS) loop.o
$(SUB_TARGETS):
$(MAKE) -k -C $@ all
sub_run_tests: all
@for TARGET in $(SUB_TARGETS); do \
$(MAKE) -C $$TARGET run_tests; \
done;
sub_clean:
@for TARGET in $(SUB_TARGETS); do \
$(MAKE) -C $$TARGET clean; \
done;
.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)

View file

@ -0,0 +1,147 @@
/*
* Copyright 2013, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <sys/prctl.h>
#include "event.h"
#include "utils.h"
#include "lib.h"
extern void thirty_two_instruction_loop(u64 loops);
static void setup_event(struct event *e, u64 config, char *name)
{
event_init_opts(e, config, PERF_TYPE_HARDWARE, name);
e->attr.disabled = 1;
e->attr.exclude_kernel = 1;
e->attr.exclude_hv = 1;
e->attr.exclude_idle = 1;
}
static int do_count_loop(struct event *events, u64 instructions,
u64 overhead, bool report)
{
s64 difference, expected;
double percentage;
prctl(PR_TASK_PERF_EVENTS_ENABLE);
/* Run for 1M instructions */
thirty_two_instruction_loop(instructions >> 5);
prctl(PR_TASK_PERF_EVENTS_DISABLE);
event_read(&events[0]);
event_read(&events[1]);
expected = instructions + overhead;
difference = events[0].result.value - expected;
percentage = (double)difference / events[0].result.value * 100;
if (report) {
event_report(&events[0]);
event_report(&events[1]);
printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
printf("Expected %llu\n", expected);
printf("Actual %llu\n", events[0].result.value);
printf("Delta %lld, %f%%\n", difference, percentage);
}
event_reset(&events[0]);
event_reset(&events[1]);
if (difference < 0)
difference = -difference;
/* Tolerate a difference below 0.0001 % */
difference *= 10000 * 100;
if (difference / events[0].result.value)
return -1;
return 0;
}
/* Count how many instructions it takes to do a null loop */
static u64 determine_overhead(struct event *events)
{
u64 current, overhead;
int i;
do_count_loop(events, 0, 0, false);
overhead = events[0].result.value;
for (i = 0; i < 100; i++) {
do_count_loop(events, 0, 0, false);
current = events[0].result.value;
if (current < overhead) {
printf("Replacing overhead %llu with %llu\n", overhead, current);
overhead = current;
}
}
return overhead;
}
static int test_body(void)
{
struct event events[2];
u64 overhead;
setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, "instructions");
setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, "cycles");
if (event_open(&events[0])) {
perror("perf_event_open");
return -1;
}
if (event_open_with_group(&events[1], events[0].fd)) {
perror("perf_event_open");
return -1;
}
overhead = determine_overhead(events);
printf("Overhead of null loop: %llu instructions\n", overhead);
/* Run for 1Mi instructions */
FAIL_IF(do_count_loop(events, 1000000, overhead, true));
/* Run for 10Mi instructions */
FAIL_IF(do_count_loop(events, 10000000, overhead, true));
/* Run for 100Mi instructions */
FAIL_IF(do_count_loop(events, 100000000, overhead, true));
/* Run for 1Bi instructions */
FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
/* Run for 16Bi instructions */
FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
/* Run for 64Bi instructions */
FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
event_close(&events[0]);
event_close(&events[1]);
return 0;
}
static int count_instructions(void)
{
return eat_cpu(test_body);
}
int main(void)
{
return test_harness(count_instructions, "count_instructions");
}

View file

@ -0,0 +1,33 @@
noarg:
$(MAKE) -C ../../
# The EBB handler is 64-bit code and everything links against it
CFLAGS += -m64
PROGS := reg_access_test event_attributes_test cycles_test \
cycles_with_freeze_test pmc56_overflow_test \
ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \
task_event_pinned_vs_ebb_test multi_ebb_procs_test \
multi_counter_test pmae_handling_test \
close_clears_pmcc_test instruction_count_test \
fork_cleanup_test ebb_on_child_test \
ebb_on_willing_child_test back_to_back_ebbs_test \
lost_exception_test no_handler_test \
cycles_with_mmcr2_test
all: $(PROGS)
$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
instruction_count_test: ../loop.S
lost_exception_test: ../lib.c
run_tests: all
@-for PROG in $(PROGS); do \
./$$PROG; \
done;
clean:
rm -f $(PROGS)

View file

@ -0,0 +1,106 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "ebb.h"
#define NUMBER_OF_EBBS 50
/*
* Test that if we overflow the counter while in the EBB handler, we take
* another EBB on exiting from the handler.
*
* We do this by counting with a stupidly low sample period, causing us to
* overflow the PMU while we're still in the EBB handler, leading to another
* EBB.
*
* We get out of what would otherwise be an infinite loop by leaving the
* counter frozen once we've taken enough EBBs.
*/
static void ebb_callee(void)
{
uint64_t siar, val;
val = mfspr(SPRN_BESCR);
if (!(val & BESCR_PMEO)) {
ebb_state.stats.spurious++;
goto out;
}
ebb_state.stats.ebb_count++;
trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
/* Resets the PMC */
count_pmc(1, sample_period);
out:
if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
/* Reset but leave counters frozen */
reset_ebb_with_clear_mask(MMCR0_PMAO);
else
/* Unfreezes */
reset_ebb();
/* Do some stuff to chew some cycles and pop the counter */
siar = mfspr(SPRN_SIAR);
trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
val = mfspr(SPRN_PMC1);
trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
val = mfspr(SPRN_MMCR0);
trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
}
int back_to_back_ebbs(void)
{
struct event event;
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open(&event));
setup_ebb_handler(ebb_callee);
FAIL_IF(ebb_event_enable(&event));
sample_period = 5;
ebb_freeze_pmcs();
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
ebb_global_enable();
ebb_unfreeze_pmcs();
while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
FAIL_IF(core_busy_loop());
ebb_global_disable();
ebb_freeze_pmcs();
count_pmc(1, sample_period);
dump_ebb_state();
event_close(&event);
FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
return 0;
}
int main(void)
{
return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
}

View file

@ -0,0 +1,271 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <ppc-asm.h>
.text
FUNC_START(core_busy_loop)
stdu %r1, -168(%r1)
std r14, 160(%r1)
std r15, 152(%r1)
std r16, 144(%r1)
std r17, 136(%r1)
std r18, 128(%r1)
std r19, 120(%r1)
std r20, 112(%r1)
std r21, 104(%r1)
std r22, 96(%r1)
std r23, 88(%r1)
std r24, 80(%r1)
std r25, 72(%r1)
std r26, 64(%r1)
std r27, 56(%r1)
std r28, 48(%r1)
std r29, 40(%r1)
std r30, 32(%r1)
std r31, 24(%r1)
li r3, 0x3030
std r3, -96(%r1)
li r4, 0x4040
std r4, -104(%r1)
li r5, 0x5050
std r5, -112(%r1)
li r6, 0x6060
std r6, -120(%r1)
li r7, 0x7070
std r7, -128(%r1)
li r8, 0x0808
std r8, -136(%r1)
li r9, 0x0909
std r9, -144(%r1)
li r10, 0x1010
std r10, -152(%r1)
li r11, 0x1111
std r11, -160(%r1)
li r14, 0x1414
std r14, -168(%r1)
li r15, 0x1515
std r15, -176(%r1)
li r16, 0x1616
std r16, -184(%r1)
li r17, 0x1717
std r17, -192(%r1)
li r18, 0x1818
std r18, -200(%r1)
li r19, 0x1919
std r19, -208(%r1)
li r20, 0x2020
std r20, -216(%r1)
li r21, 0x2121
std r21, -224(%r1)
li r22, 0x2222
std r22, -232(%r1)
li r23, 0x2323
std r23, -240(%r1)
li r24, 0x2424
std r24, -248(%r1)
li r25, 0x2525
std r25, -256(%r1)
li r26, 0x2626
std r26, -264(%r1)
li r27, 0x2727
std r27, -272(%r1)
li r28, 0x2828
std r28, -280(%r1)
li r29, 0x2929
std r29, -288(%r1)
li r30, 0x3030
li r31, 0x3131
li r3, 0
0: addi r3, r3, 1
cmpwi r3, 100
blt 0b
/* Return 1 (fail) unless we get through all the checks */
li r3, 1
/* Check none of our registers have been corrupted */
cmpwi r4, 0x4040
bne 1f
cmpwi r5, 0x5050
bne 1f
cmpwi r6, 0x6060
bne 1f
cmpwi r7, 0x7070
bne 1f
cmpwi r8, 0x0808
bne 1f
cmpwi r9, 0x0909
bne 1f
cmpwi r10, 0x1010
bne 1f
cmpwi r11, 0x1111
bne 1f
cmpwi r14, 0x1414
bne 1f
cmpwi r15, 0x1515
bne 1f
cmpwi r16, 0x1616
bne 1f
cmpwi r17, 0x1717
bne 1f
cmpwi r18, 0x1818
bne 1f
cmpwi r19, 0x1919
bne 1f
cmpwi r20, 0x2020
bne 1f
cmpwi r21, 0x2121
bne 1f
cmpwi r22, 0x2222
bne 1f
cmpwi r23, 0x2323
bne 1f
cmpwi r24, 0x2424
bne 1f
cmpwi r25, 0x2525
bne 1f
cmpwi r26, 0x2626
bne 1f
cmpwi r27, 0x2727
bne 1f
cmpwi r28, 0x2828
bne 1f
cmpwi r29, 0x2929
bne 1f
cmpwi r30, 0x3030
bne 1f
cmpwi r31, 0x3131
bne 1f
/* Load junk into all our registers before we reload them from the stack. */
li r3, 0xde
li r4, 0xad
li r5, 0xbe
li r6, 0xef
li r7, 0xde
li r8, 0xad
li r9, 0xbe
li r10, 0xef
li r11, 0xde
li r14, 0xad
li r15, 0xbe
li r16, 0xef
li r17, 0xde
li r18, 0xad
li r19, 0xbe
li r20, 0xef
li r21, 0xde
li r22, 0xad
li r23, 0xbe
li r24, 0xef
li r25, 0xde
li r26, 0xad
li r27, 0xbe
li r28, 0xef
li r29, 0xdd
ld r3, -96(%r1)
cmpwi r3, 0x3030
bne 1f
ld r4, -104(%r1)
cmpwi r4, 0x4040
bne 1f
ld r5, -112(%r1)
cmpwi r5, 0x5050
bne 1f
ld r6, -120(%r1)
cmpwi r6, 0x6060
bne 1f
ld r7, -128(%r1)
cmpwi r7, 0x7070
bne 1f
ld r8, -136(%r1)
cmpwi r8, 0x0808
bne 1f
ld r9, -144(%r1)
cmpwi r9, 0x0909
bne 1f
ld r10, -152(%r1)
cmpwi r10, 0x1010
bne 1f
ld r11, -160(%r1)
cmpwi r11, 0x1111
bne 1f
ld r14, -168(%r1)
cmpwi r14, 0x1414
bne 1f
ld r15, -176(%r1)
cmpwi r15, 0x1515
bne 1f
ld r16, -184(%r1)
cmpwi r16, 0x1616
bne 1f
ld r17, -192(%r1)
cmpwi r17, 0x1717
bne 1f
ld r18, -200(%r1)
cmpwi r18, 0x1818
bne 1f
ld r19, -208(%r1)
cmpwi r19, 0x1919
bne 1f
ld r20, -216(%r1)
cmpwi r20, 0x2020
bne 1f
ld r21, -224(%r1)
cmpwi r21, 0x2121
bne 1f
ld r22, -232(%r1)
cmpwi r22, 0x2222
bne 1f
ld r23, -240(%r1)
cmpwi r23, 0x2323
bne 1f
ld r24, -248(%r1)
cmpwi r24, 0x2424
bne 1f
ld r25, -256(%r1)
cmpwi r25, 0x2525
bne 1f
ld r26, -264(%r1)
cmpwi r26, 0x2626
bne 1f
ld r27, -272(%r1)
cmpwi r27, 0x2727
bne 1f
ld r28, -280(%r1)
cmpwi r28, 0x2828
bne 1f
ld r29, -288(%r1)
cmpwi r29, 0x2929
bne 1f
/* Load 0 (success) to return */
li r3, 0
1: ld r14, 160(%r1)
ld r15, 152(%r1)
ld r16, 144(%r1)
ld r17, 136(%r1)
ld r18, 128(%r1)
ld r19, 120(%r1)
ld r20, 112(%r1)
ld r21, 104(%r1)
ld r22, 96(%r1)
ld r23, 88(%r1)
ld r24, 80(%r1)
ld r25, 72(%r1)
ld r26, 64(%r1)
ld r27, 56(%r1)
ld r28, 48(%r1)
ld r29, 40(%r1)
ld r30, 32(%r1)
ld r31, 24(%r1)
addi %r1, %r1, 168
blr

View file

@ -0,0 +1,59 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>
#include <signal.h>
#include "ebb.h"
/*
* Test that closing the EBB event clears MMCR0_PMCC, preventing further access
* by userspace to the PMU hardware.
*/
int close_clears_pmcc(void)
{
struct event event;
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
FAIL_IF(event_open(&event));
ebb_enable_pmc_counting(1);
setup_ebb_handler(standard_ebb_callee);
ebb_global_enable();
FAIL_IF(ebb_event_enable(&event));
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
while (ebb_state.stats.ebb_count < 1)
FAIL_IF(core_busy_loop());
ebb_global_disable();
event_close(&event);
FAIL_IF(ebb_state.stats.ebb_count == 0);
/* The real test is here, do we take a SIGILL when writing PMU regs now
* that we have closed the event. We expect that we will. */
FAIL_IF(catch_sigill(write_pmc1));
/* We should still be able to read EBB regs though */
mfspr(SPRN_EBBHR);
mfspr(SPRN_EBBRR);
mfspr(SPRN_BESCR);
return 0;
}
int main(void)
{
return test_harness(close_clears_pmcc, "close_clears_pmcc");
}

View file

@ -0,0 +1,93 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "ebb.h"
/*
* Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
* should remain and the EBB event should fail to enable.
*/
static int setup_cpu_event(struct event *event, int cpu)
{
event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
event->attr.pinned = 1;
event->attr.exclude_kernel = 1;
event->attr.exclude_hv = 1;
event->attr.exclude_idle = 1;
SKIP_IF(require_paranoia_below(1));
FAIL_IF(event_open_with_cpu(event, cpu));
FAIL_IF(event_enable(event));
return 0;
}
int cpu_event_pinned_vs_ebb(void)
{
union pipe read_pipe, write_pipe;
struct event event;
int cpu, rc;
pid_t pid;
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
pid = fork();
if (pid == 0) {
/* NB order of pipes looks reversed */
exit(ebb_child(write_pipe, read_pipe));
}
/* We setup the cpu event first */
rc = setup_cpu_event(&event, cpu);
if (rc) {
kill_child_and_wait(pid);
return rc;
}
/* Signal the child to install its EBB event and wait */
if (sync_with_child(read_pipe, write_pipe))
/* If it fails, wait for it to exit */
goto wait;
/* Signal the child to run */
FAIL_IF(sync_with_child(read_pipe, write_pipe));
wait:
/* We expect it to fail to read the event */
FAIL_IF(wait_for_child(pid) != 2);
FAIL_IF(event_disable(&event));
FAIL_IF(event_read(&event));
event_report(&event);
/* The cpu event should have run */
FAIL_IF(event.result.value == 0);
FAIL_IF(event.result.enabled != event.result.running);
return 0;
}
int main(void)
{
return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
}

View file

@ -0,0 +1,89 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "ebb.h"
/*
* Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
* event off the PMU.
*/
static int setup_cpu_event(struct event *event, int cpu)
{
event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
event->attr.exclude_kernel = 1;
event->attr.exclude_hv = 1;
event->attr.exclude_idle = 1;
SKIP_IF(require_paranoia_below(1));
FAIL_IF(event_open_with_cpu(event, cpu));
FAIL_IF(event_enable(event));
return 0;
}
int cpu_event_vs_ebb(void)
{
union pipe read_pipe, write_pipe;
struct event event;
int cpu, rc;
pid_t pid;
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
pid = fork();
if (pid == 0) {
/* NB order of pipes looks reversed */
exit(ebb_child(write_pipe, read_pipe));
}
/* We setup the cpu event first */
rc = setup_cpu_event(&event, cpu);
if (rc) {
kill_child_and_wait(pid);
return rc;
}
/* Signal the child to install its EBB event and wait */
if (sync_with_child(read_pipe, write_pipe))
/* If it fails, wait for it to exit */
goto wait;
/* Signal the child to run */
FAIL_IF(sync_with_child(read_pipe, write_pipe));
wait:
/* We expect the child to succeed */
FAIL_IF(wait_for_child(pid));
FAIL_IF(event_disable(&event));
FAIL_IF(event_read(&event));
event_report(&event);
/* The cpu event may have run */
return 0;
}
int main(void)
{
return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
}

View file

@ -0,0 +1,58 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <stdio.h>
#include <stdlib.h>
#include "ebb.h"
/*
* Basic test that counts user cycles and takes EBBs.
*/
int cycles(void)
{
struct event event;
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open(&event));
ebb_enable_pmc_counting(1);
setup_ebb_handler(standard_ebb_callee);
ebb_global_enable();
FAIL_IF(ebb_event_enable(&event));
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
while (ebb_state.stats.ebb_count < 10) {
FAIL_IF(core_busy_loop());
FAIL_IF(ebb_check_mmcr0());
}
ebb_global_disable();
ebb_freeze_pmcs();
count_pmc(1, sample_period);
dump_ebb_state();
event_close(&event);
FAIL_IF(ebb_state.stats.ebb_count == 0);
FAIL_IF(!ebb_check_count(1, sample_period, 100));
return 0;
}
int main(void)
{
return test_harness(cycles, "cycles");
}

View file

@ -0,0 +1,117 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "ebb.h"
/*
* Test of counting cycles while using MMCR0_FC (freeze counters) to only count
* parts of the code. This is complicated by the fact that FC is set by the
* hardware when the event overflows. We may take the EBB after we have set FC,
* so we have to be careful about whether we clear FC at the end of the EBB
* handler or not.
*/
static bool counters_frozen = false;
static int ebbs_while_frozen = 0;
static void ebb_callee(void)
{
uint64_t mask, val;
mask = MMCR0_PMAO | MMCR0_FC;
val = mfspr(SPRN_BESCR);
if (!(val & BESCR_PMEO)) {
ebb_state.stats.spurious++;
goto out;
}
ebb_state.stats.ebb_count++;
trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
val = mfspr(SPRN_MMCR0);
trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
if (counters_frozen) {
trace_log_string(ebb_state.trace, "frozen");
ebbs_while_frozen++;
mask &= ~MMCR0_FC;
}
count_pmc(1, sample_period);
out:
reset_ebb_with_clear_mask(mask);
}
int cycles_with_freeze(void)
{
struct event event;
uint64_t val;
bool fc_cleared;
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open(&event));
setup_ebb_handler(ebb_callee);
ebb_global_enable();
FAIL_IF(ebb_event_enable(&event));
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
fc_cleared = false;
/* Make sure we loop until we take at least one EBB */
while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
ebb_state.stats.ebb_count < 1)
{
counters_frozen = false;
mb();
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
FAIL_IF(core_busy_loop());
counters_frozen = true;
mb();
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
val = mfspr(SPRN_MMCR0);
if (! (val & MMCR0_FC)) {
printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
fc_cleared = true;
}
}
ebb_global_disable();
ebb_freeze_pmcs();
count_pmc(1, sample_period);
dump_ebb_state();
printf("EBBs while frozen %d\n", ebbs_while_frozen);
event_close(&event);
FAIL_IF(ebb_state.stats.ebb_count == 0);
FAIL_IF(fc_cleared);
return 0;
}
int main(void)
{
return test_harness(cycles_with_freeze, "cycles_with_freeze");
}

View file

@ -0,0 +1,91 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "ebb.h"
/*
* Test of counting cycles while manipulating the user accessible bits in MMCR2.
*/
/* We use two values because the first freezes PMC1 and so we would get no EBBs */
#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
#define MMCR2_EXPECTED_2 0x0020100804020000UL /* ( FC2P|FC3P|FC4P|FC5P|FC6P) */
int cycles_with_mmcr2(void)
{
struct event event;
uint64_t val, expected[2], actual;
int i;
bool bad_mmcr2;
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open(&event));
ebb_enable_pmc_counting(1);
setup_ebb_handler(standard_ebb_callee);
ebb_global_enable();
FAIL_IF(ebb_event_enable(&event));
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
/* XXX Set of MMCR2 must be after enable */
expected[0] = MMCR2_EXPECTED_1;
expected[1] = MMCR2_EXPECTED_2;
i = 0;
bad_mmcr2 = false;
/* Make sure we loop until we take at least one EBB */
while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
ebb_state.stats.ebb_count < 1)
{
mtspr(SPRN_MMCR2, expected[i % 2]);
FAIL_IF(core_busy_loop());
val = mfspr(SPRN_MMCR2);
if (val != expected[i % 2]) {
bad_mmcr2 = true;
actual = val;
}
i++;
}
ebb_global_disable();
ebb_freeze_pmcs();
count_pmc(1, sample_period);
dump_ebb_state();
event_close(&event);
FAIL_IF(ebb_state.stats.ebb_count == 0);
if (bad_mmcr2)
printf("Bad MMCR2 value seen is 0x%lx\n", actual);
FAIL_IF(bad_mmcr2);
return 0;
}
int main(void)
{
return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
}

View file

@ -0,0 +1,478 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#define _GNU_SOURCE /* For CPU_ZERO etc. */
#include <sched.h>
#include <sys/wait.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "trace.h"
#include "reg.h"
#include "ebb.h"
void (*ebb_user_func)(void);
void ebb_hook(void)
{
if (ebb_user_func)
ebb_user_func();
}
struct ebb_state ebb_state;
u64 sample_period = 0x40000000ull;
void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
{
u64 val;
/* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
/* 3) set MMCR0[PMAE] - docs say BESCR[PME] should do this */
val = mfspr(SPRN_MMCR0);
mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
/* 4) clear BESCR[PMEO] */
mtspr(SPRN_BESCRR, BESCR_PMEO);
/* 5) set BESCR[PME] */
mtspr(SPRN_BESCRS, BESCR_PME);
/* 6) rfebb 1 - done in our caller */
}
void reset_ebb(void)
{
reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
}
/* Called outside of the EBB handler to check MMCR0 is sane */
int ebb_check_mmcr0(void)
{
u64 val;
val = mfspr(SPRN_MMCR0);
if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
/* It's OK if we see FC & PMAO, but not FC by itself */
printf("Outside of loop, only FC set 0x%llx\n", val);
return 1;
}
return 0;
}
bool ebb_check_count(int pmc, u64 sample_period, int fudge)
{
u64 count, upper, lower;
count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
lower = ebb_state.stats.ebb_count * (sample_period - fudge);
if (count < lower) {
printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
pmc, count, lower, lower - count);
return false;
}
upper = ebb_state.stats.ebb_count * (sample_period + fudge);
if (count > upper) {
printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
pmc, count, upper, count - upper);
return false;
}
printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
pmc, count, lower, upper, count - lower, upper - count);
return true;
}
void standard_ebb_callee(void)
{
int found, i;
u64 val;
val = mfspr(SPRN_BESCR);
if (!(val & BESCR_PMEO)) {
ebb_state.stats.spurious++;
goto out;
}
ebb_state.stats.ebb_count++;
trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
val = mfspr(SPRN_MMCR0);
trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
found = 0;
for (i = 1; i <= 6; i++) {
if (ebb_state.pmc_enable[PMC_INDEX(i)])
found += count_pmc(i, sample_period);
}
if (!found)
ebb_state.stats.no_overflow++;
out:
reset_ebb();
}
extern void ebb_handler(void);
void setup_ebb_handler(void (*callee)(void))
{
u64 entry;
#if defined(_CALL_ELF) && _CALL_ELF == 2
entry = (u64)ebb_handler;
#else
struct opd
{
u64 entry;
u64 toc;
} *opd;
opd = (struct opd *)ebb_handler;
entry = opd->entry;
#endif
printf("EBB Handler is at %#llx\n", entry);
ebb_user_func = callee;
/* Ensure ebb_user_func is set before we set the handler */
mb();
mtspr(SPRN_EBBHR, entry);
/* Make sure the handler is set before we return */
mb();
}
void clear_ebb_stats(void)
{
memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
}
void dump_summary_ebb_state(void)
{
printf("ebb_state:\n" \
" ebb_count = %d\n" \
" spurious = %d\n" \
" negative = %d\n" \
" no_overflow = %d\n" \
" pmc[1] count = 0x%llx\n" \
" pmc[2] count = 0x%llx\n" \
" pmc[3] count = 0x%llx\n" \
" pmc[4] count = 0x%llx\n" \
" pmc[5] count = 0x%llx\n" \
" pmc[6] count = 0x%llx\n",
ebb_state.stats.ebb_count, ebb_state.stats.spurious,
ebb_state.stats.negative, ebb_state.stats.no_overflow,
ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
}
static char *decode_mmcr0(u32 value)
{
static char buf[16];
buf[0] = '\0';
if (value & (1 << 31))
strcat(buf, "FC ");
if (value & (1 << 26))
strcat(buf, "PMAE ");
if (value & (1 << 7))
strcat(buf, "PMAO ");
return buf;
}
static char *decode_bescr(u64 value)
{
static char buf[16];
buf[0] = '\0';
if (value & (1ull << 63))
strcat(buf, "GE ");
if (value & (1ull << 32))
strcat(buf, "PMAE ");
if (value & 1)
strcat(buf, "PMAO ");
return buf;
}
void dump_ebb_hw_state(void)
{
u64 bescr;
u32 mmcr0;
mmcr0 = mfspr(SPRN_MMCR0);
bescr = mfspr(SPRN_BESCR);
printf("HW state:\n" \
"MMCR0 0x%016x %s\n" \
"MMCR2 0x%016lx\n" \
"EBBHR 0x%016lx\n" \
"BESCR 0x%016llx %s\n" \
"PMC1 0x%016lx\n" \
"PMC2 0x%016lx\n" \
"PMC3 0x%016lx\n" \
"PMC4 0x%016lx\n" \
"PMC5 0x%016lx\n" \
"PMC6 0x%016lx\n" \
"SIAR 0x%016lx\n",
mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
mfspr(SPRN_SIAR));
}
void dump_ebb_state(void)
{
dump_summary_ebb_state();
dump_ebb_hw_state();
trace_buffer_print(ebb_state.trace);
}
int count_pmc(int pmc, uint32_t sample_period)
{
uint32_t start_value;
u64 val;
/* 0) Read PMC */
start_value = pmc_sample_period(sample_period);
val = read_pmc(pmc);
if (val < start_value)
ebb_state.stats.negative++;
else
ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
/* 1) Reset PMC */
write_pmc(pmc, start_value);
/* Report if we overflowed */
return val >= COUNTER_OVERFLOW;
}
int ebb_event_enable(struct event *e)
{
int rc;
/* Ensure any SPR writes are ordered vs us */
mb();
rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
if (rc)
return rc;
rc = event_read(e);
/* Ditto */
mb();
return rc;
}
void ebb_freeze_pmcs(void)
{
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
mb();
}
void ebb_unfreeze_pmcs(void)
{
/* Unfreeze counters */
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
mb();
}
void ebb_global_enable(void)
{
/* Enable EBBs globally and PMU EBBs */
mtspr(SPRN_BESCR, 0x8000000100000000ull);
mb();
}
void ebb_global_disable(void)
{
/* Disable EBBs & freeze counters, events are still scheduled */
mtspr(SPRN_BESCRR, BESCR_PME);
mb();
}
void event_ebb_init(struct event *e)
{
e->attr.config |= (1ull << 63);
}
void event_bhrb_init(struct event *e, unsigned ifm)
{
e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
}
void event_leader_ebb_init(struct event *e)
{
event_ebb_init(e);
e->attr.exclusive = 1;
e->attr.pinned = 1;
}
int ebb_child(union pipe read_pipe, union pipe write_pipe)
{
struct event event;
uint64_t val;
FAIL_IF(wait_for_parent(read_pipe));
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open(&event));
ebb_enable_pmc_counting(1);
setup_ebb_handler(standard_ebb_callee);
ebb_global_enable();
FAIL_IF(event_enable(&event));
if (event_read(&event)) {
/*
* Some tests expect to fail here, so don't report an error on
* this line, and return a distinguisable error code. Tell the
* parent an error happened.
*/
notify_parent_of_error(write_pipe);
return 2;
}
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
FAIL_IF(notify_parent(write_pipe));
FAIL_IF(wait_for_parent(read_pipe));
FAIL_IF(notify_parent(write_pipe));
while (ebb_state.stats.ebb_count < 20) {
FAIL_IF(core_busy_loop());
/* To try and hit SIGILL case */
val = mfspr(SPRN_MMCRA);
val |= mfspr(SPRN_MMCR2);
val |= mfspr(SPRN_MMCR0);
}
ebb_global_disable();
ebb_freeze_pmcs();
count_pmc(1, sample_period);
dump_ebb_state();
event_close(&event);
FAIL_IF(ebb_state.stats.ebb_count == 0);
return 0;
}
static jmp_buf setjmp_env;
static void sigill_handler(int signal)
{
printf("Took sigill\n");
longjmp(setjmp_env, 1);
}
static struct sigaction sigill_action = {
.sa_handler = sigill_handler,
};
int catch_sigill(void (*func)(void))
{
if (sigaction(SIGILL, &sigill_action, NULL)) {
perror("sigaction");
return 1;
}
if (setjmp(setjmp_env) == 0) {
func();
return 1;
}
return 0;
}
void write_pmc1(void)
{
mtspr(SPRN_PMC1, 0);
}
void write_pmc(int pmc, u64 value)
{
switch (pmc) {
case 1: mtspr(SPRN_PMC1, value); break;
case 2: mtspr(SPRN_PMC2, value); break;
case 3: mtspr(SPRN_PMC3, value); break;
case 4: mtspr(SPRN_PMC4, value); break;
case 5: mtspr(SPRN_PMC5, value); break;
case 6: mtspr(SPRN_PMC6, value); break;
}
}
u64 read_pmc(int pmc)
{
switch (pmc) {
case 1: return mfspr(SPRN_PMC1);
case 2: return mfspr(SPRN_PMC2);
case 3: return mfspr(SPRN_PMC3);
case 4: return mfspr(SPRN_PMC4);
case 5: return mfspr(SPRN_PMC5);
case 6: return mfspr(SPRN_PMC6);
}
return 0;
}
static void term_handler(int signal)
{
dump_summary_ebb_state();
dump_ebb_hw_state();
abort();
}
struct sigaction term_action = {
.sa_handler = term_handler,
};
static void __attribute__((constructor)) ebb_init(void)
{
clear_ebb_stats();
if (sigaction(SIGTERM, &term_action, NULL))
perror("sigaction");
ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
}

View file

@ -0,0 +1,77 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
#include "../event.h"
#include "../lib.h"
#include "trace.h"
#include "reg.h"
#define PMC_INDEX(pmc) ((pmc)-1)
#define NUM_PMC_VALUES 128
struct ebb_state
{
struct {
u64 pmc_count[6];
volatile int ebb_count;
int spurious;
int negative;
int no_overflow;
} stats;
bool pmc_enable[6];
struct trace_buffer *trace;
};
extern struct ebb_state ebb_state;
#define COUNTER_OVERFLOW 0x80000000ull
static inline uint32_t pmc_sample_period(uint32_t value)
{
return COUNTER_OVERFLOW - value;
}
static inline void ebb_enable_pmc_counting(int pmc)
{
ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
}
bool ebb_check_count(int pmc, u64 sample_period, int fudge);
void event_leader_ebb_init(struct event *e);
void event_ebb_init(struct event *e);
void event_bhrb_init(struct event *e, unsigned ifm);
void setup_ebb_handler(void (*callee)(void));
void standard_ebb_callee(void);
int ebb_event_enable(struct event *e);
void ebb_global_enable(void);
void ebb_global_disable(void);
void ebb_freeze_pmcs(void);
void ebb_unfreeze_pmcs(void);
void event_ebb_init(struct event *e);
void event_leader_ebb_init(struct event *e);
int count_pmc(int pmc, uint32_t sample_period);
void dump_ebb_state(void);
void dump_summary_ebb_state(void);
void dump_ebb_hw_state(void);
void clear_ebb_stats(void);
void write_pmc(int pmc, u64 value);
u64 read_pmc(int pmc);
void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
void reset_ebb(void);
int ebb_check_mmcr0(void);
extern u64 sample_period;
int core_busy_loop(void);
int ebb_child(union pipe read_pipe, union pipe write_pipe);
int catch_sigill(void (*func)(void));
void write_pmc1(void);
#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */

View file

@ -0,0 +1,365 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <ppc-asm.h>
#include "reg.h"
/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
#define r1 1
#define r2 2
#define RFEBB .long 0x4c000924
/* Stack layout:
*
* ^
* User stack |
* Back chain ------+ <- r1 <-------+
* ... |
* Red zone / ABI Gap |
* ... |
* vr63 <+ |
* vr0 | |
* VSCR | |
* FSCR | |
* r31 | Save area |
* r0 | |
* XER | |
* CTR | |
* LR | |
* CCR <+ |
* ... <+ |
* LR | Caller frame |
* CCR | |
* Back chain <+ <- updated r1 --------+
*
*/
#if defined(_CALL_ELF) && _CALL_ELF == 2
#define ABIGAP 512
#else
#define ABIGAP 288
#endif
#define NR_GPR 32
#define NR_SPR 6
#define NR_VSR 64
#define SAVE_AREA ((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
#define CALLER_FRAME 112
#define STACK_FRAME (ABIGAP + SAVE_AREA + CALLER_FRAME)
#define CCR_SAVE (CALLER_FRAME)
#define LR_SAVE (CCR_SAVE + 8)
#define CTR_SAVE (LR_SAVE + 8)
#define XER_SAVE (CTR_SAVE + 8)
#define GPR_SAVE(n) (XER_SAVE + 8 + (8 * n))
#define FSCR_SAVE (GPR_SAVE(31) + 8)
#define VSCR_SAVE (FSCR_SAVE + 8)
#define VSR_SAVE(n) (VSCR_SAVE + 8 + (16 * n))
#define SAVE_GPR(n) std n,GPR_SAVE(n)(r1)
#define REST_GPR(n) ld n,GPR_SAVE(n)(r1)
#define TRASH_GPR(n) lis n,0xaaaa
#define SAVE_VSR(n, b) li b, VSR_SAVE(n); stxvd2x n,b,r1
#define LOAD_VSR(n, b) li b, VSR_SAVE(n); lxvd2x n,b,r1
#define LOAD_REG_IMMEDIATE(reg,expr) \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher; \
rldicr reg,reg,32,31; \
oris reg,reg,(expr)@h; \
ori reg,reg,(expr)@l;
#if defined(_CALL_ELF) && _CALL_ELF == 2
#define ENTRY_POINT(name) \
.type FUNC_NAME(name),@function; \
.globl FUNC_NAME(name); \
FUNC_NAME(name):
#define RESTORE_TOC(name) \
/* Restore our TOC pointer using our entry point */ \
LOAD_REG_IMMEDIATE(r12, name) \
0: addis r2,r12,(.TOC.-0b)@ha; \
addi r2,r2,(.TOC.-0b)@l;
#else
#define ENTRY_POINT(name) FUNC_START(name)
#define RESTORE_TOC(name) \
/* Restore our TOC pointer via our opd entry */ \
LOAD_REG_IMMEDIATE(r2, name) \
ld r2,8(r2);
#endif
.text
ENTRY_POINT(ebb_handler)
stdu r1,-STACK_FRAME(r1)
SAVE_GPR(0)
mflr r0
std r0,LR_SAVE(r1)
mfcr r0
std r0,CCR_SAVE(r1)
mfctr r0
std r0,CTR_SAVE(r1)
mfxer r0
std r0,XER_SAVE(r1)
SAVE_GPR(2)
SAVE_GPR(3)
SAVE_GPR(4)
SAVE_GPR(5)
SAVE_GPR(6)
SAVE_GPR(7)
SAVE_GPR(8)
SAVE_GPR(9)
SAVE_GPR(10)
SAVE_GPR(11)
SAVE_GPR(12)
SAVE_GPR(13)
SAVE_GPR(14)
SAVE_GPR(15)
SAVE_GPR(16)
SAVE_GPR(17)
SAVE_GPR(18)
SAVE_GPR(19)
SAVE_GPR(20)
SAVE_GPR(21)
SAVE_GPR(22)
SAVE_GPR(23)
SAVE_GPR(24)
SAVE_GPR(25)
SAVE_GPR(26)
SAVE_GPR(27)
SAVE_GPR(28)
SAVE_GPR(29)
SAVE_GPR(30)
SAVE_GPR(31)
SAVE_VSR(0, r3)
mffs f0
stfd f0, FSCR_SAVE(r1)
mfvscr f0
stfd f0, VSCR_SAVE(r1)
SAVE_VSR(1, r3)
SAVE_VSR(2, r3)
SAVE_VSR(3, r3)
SAVE_VSR(4, r3)
SAVE_VSR(5, r3)
SAVE_VSR(6, r3)
SAVE_VSR(7, r3)
SAVE_VSR(8, r3)
SAVE_VSR(9, r3)
SAVE_VSR(10, r3)
SAVE_VSR(11, r3)
SAVE_VSR(12, r3)
SAVE_VSR(13, r3)
SAVE_VSR(14, r3)
SAVE_VSR(15, r3)
SAVE_VSR(16, r3)
SAVE_VSR(17, r3)
SAVE_VSR(18, r3)
SAVE_VSR(19, r3)
SAVE_VSR(20, r3)
SAVE_VSR(21, r3)
SAVE_VSR(22, r3)
SAVE_VSR(23, r3)
SAVE_VSR(24, r3)
SAVE_VSR(25, r3)
SAVE_VSR(26, r3)
SAVE_VSR(27, r3)
SAVE_VSR(28, r3)
SAVE_VSR(29, r3)
SAVE_VSR(30, r3)
SAVE_VSR(31, r3)
SAVE_VSR(32, r3)
SAVE_VSR(33, r3)
SAVE_VSR(34, r3)
SAVE_VSR(35, r3)
SAVE_VSR(36, r3)
SAVE_VSR(37, r3)
SAVE_VSR(38, r3)
SAVE_VSR(39, r3)
SAVE_VSR(40, r3)
SAVE_VSR(41, r3)
SAVE_VSR(42, r3)
SAVE_VSR(43, r3)
SAVE_VSR(44, r3)
SAVE_VSR(45, r3)
SAVE_VSR(46, r3)
SAVE_VSR(47, r3)
SAVE_VSR(48, r3)
SAVE_VSR(49, r3)
SAVE_VSR(50, r3)
SAVE_VSR(51, r3)
SAVE_VSR(52, r3)
SAVE_VSR(53, r3)
SAVE_VSR(54, r3)
SAVE_VSR(55, r3)
SAVE_VSR(56, r3)
SAVE_VSR(57, r3)
SAVE_VSR(58, r3)
SAVE_VSR(59, r3)
SAVE_VSR(60, r3)
SAVE_VSR(61, r3)
SAVE_VSR(62, r3)
SAVE_VSR(63, r3)
TRASH_GPR(2)
TRASH_GPR(3)
TRASH_GPR(4)
TRASH_GPR(5)
TRASH_GPR(6)
TRASH_GPR(7)
TRASH_GPR(8)
TRASH_GPR(9)
TRASH_GPR(10)
TRASH_GPR(11)
TRASH_GPR(12)
TRASH_GPR(14)
TRASH_GPR(15)
TRASH_GPR(16)
TRASH_GPR(17)
TRASH_GPR(18)
TRASH_GPR(19)
TRASH_GPR(20)
TRASH_GPR(21)
TRASH_GPR(22)
TRASH_GPR(23)
TRASH_GPR(24)
TRASH_GPR(25)
TRASH_GPR(26)
TRASH_GPR(27)
TRASH_GPR(28)
TRASH_GPR(29)
TRASH_GPR(30)
TRASH_GPR(31)
RESTORE_TOC(ebb_handler)
/*
* r13 is our TLS pointer. We leave whatever value was in there when the
* EBB fired. That seems to be OK because once set the TLS pointer is not
* changed - but presumably that could change in future.
*/
bl ebb_hook
nop
/* r2 may be changed here but we don't care */
lfd f0, FSCR_SAVE(r1)
mtfsf 0xff,f0
lfd f0, VSCR_SAVE(r1)
mtvscr f0
LOAD_VSR(0, r3)
LOAD_VSR(1, r3)
LOAD_VSR(2, r3)
LOAD_VSR(3, r3)
LOAD_VSR(4, r3)
LOAD_VSR(5, r3)
LOAD_VSR(6, r3)
LOAD_VSR(7, r3)
LOAD_VSR(8, r3)
LOAD_VSR(9, r3)
LOAD_VSR(10, r3)
LOAD_VSR(11, r3)
LOAD_VSR(12, r3)
LOAD_VSR(13, r3)
LOAD_VSR(14, r3)
LOAD_VSR(15, r3)
LOAD_VSR(16, r3)
LOAD_VSR(17, r3)
LOAD_VSR(18, r3)
LOAD_VSR(19, r3)
LOAD_VSR(20, r3)
LOAD_VSR(21, r3)
LOAD_VSR(22, r3)
LOAD_VSR(23, r3)
LOAD_VSR(24, r3)
LOAD_VSR(25, r3)
LOAD_VSR(26, r3)
LOAD_VSR(27, r3)
LOAD_VSR(28, r3)
LOAD_VSR(29, r3)
LOAD_VSR(30, r3)
LOAD_VSR(31, r3)
LOAD_VSR(32, r3)
LOAD_VSR(33, r3)
LOAD_VSR(34, r3)
LOAD_VSR(35, r3)
LOAD_VSR(36, r3)
LOAD_VSR(37, r3)
LOAD_VSR(38, r3)
LOAD_VSR(39, r3)
LOAD_VSR(40, r3)
LOAD_VSR(41, r3)
LOAD_VSR(42, r3)
LOAD_VSR(43, r3)
LOAD_VSR(44, r3)
LOAD_VSR(45, r3)
LOAD_VSR(46, r3)
LOAD_VSR(47, r3)
LOAD_VSR(48, r3)
LOAD_VSR(49, r3)
LOAD_VSR(50, r3)
LOAD_VSR(51, r3)
LOAD_VSR(52, r3)
LOAD_VSR(53, r3)
LOAD_VSR(54, r3)
LOAD_VSR(55, r3)
LOAD_VSR(56, r3)
LOAD_VSR(57, r3)
LOAD_VSR(58, r3)
LOAD_VSR(59, r3)
LOAD_VSR(60, r3)
LOAD_VSR(61, r3)
LOAD_VSR(62, r3)
LOAD_VSR(63, r3)
ld r0,XER_SAVE(r1)
mtxer r0
ld r0,CTR_SAVE(r1)
mtctr r0
ld r0,LR_SAVE(r1)
mtlr r0
ld r0,CCR_SAVE(r1)
mtcr r0
REST_GPR(0)
REST_GPR(2)
REST_GPR(3)
REST_GPR(4)
REST_GPR(5)
REST_GPR(6)
REST_GPR(7)
REST_GPR(8)
REST_GPR(9)
REST_GPR(10)
REST_GPR(11)
REST_GPR(12)
REST_GPR(13)
REST_GPR(14)
REST_GPR(15)
REST_GPR(16)
REST_GPR(17)
REST_GPR(18)
REST_GPR(19)
REST_GPR(20)
REST_GPR(21)
REST_GPR(22)
REST_GPR(23)
REST_GPR(24)
REST_GPR(25)
REST_GPR(26)
REST_GPR(27)
REST_GPR(28)
REST_GPR(29)
REST_GPR(30)
REST_GPR(31)
addi r1,r1,STACK_FRAME
RFEBB
FUNC_END(ebb_handler)

View file

@ -0,0 +1,86 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "ebb.h"
/*
* Tests we can setup an EBB on our child. Nothing interesting happens, because
* even though the event is enabled and running the child hasn't enabled the
* actual delivery of the EBBs.
*/
static int victim_child(union pipe read_pipe, union pipe write_pipe)
{
int i;
FAIL_IF(wait_for_parent(read_pipe));
FAIL_IF(notify_parent(write_pipe));
/* Parent creates EBB event */
FAIL_IF(wait_for_parent(read_pipe));
FAIL_IF(notify_parent(write_pipe));
/* Check the EBB is enabled by writing PMC1 */
write_pmc1();
/* EBB event is enabled here */
for (i = 0; i < 1000000; i++) ;
return 0;
}
int ebb_on_child(void)
{
union pipe read_pipe, write_pipe;
struct event event;
pid_t pid;
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
pid = fork();
if (pid == 0) {
/* NB order of pipes looks reversed */
exit(victim_child(write_pipe, read_pipe));
}
FAIL_IF(sync_with_child(read_pipe, write_pipe));
/* Child is running now */
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open_with_pid(&event, pid));
FAIL_IF(ebb_event_enable(&event));
FAIL_IF(sync_with_child(read_pipe, write_pipe));
/* Child should just exit happily */
FAIL_IF(wait_for_child(pid));
event_close(&event);
return 0;
}
int main(void)
{
return test_harness(ebb_on_child, "ebb_on_child");
}

View file

@ -0,0 +1,92 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "ebb.h"
/*
* Tests we can setup an EBB on our child. The child expects this and enables
* EBBs, which are then delivered to the child, even though the event is
* created by the parent.
*/
static int victim_child(union pipe read_pipe, union pipe write_pipe)
{
FAIL_IF(wait_for_parent(read_pipe));
/* Setup our EBB handler, before the EBB event is created */
ebb_enable_pmc_counting(1);
setup_ebb_handler(standard_ebb_callee);
ebb_global_enable();
FAIL_IF(notify_parent(write_pipe));
while (ebb_state.stats.ebb_count < 20) {
FAIL_IF(core_busy_loop());
}
ebb_global_disable();
ebb_freeze_pmcs();
count_pmc(1, sample_period);
dump_ebb_state();
FAIL_IF(ebb_state.stats.ebb_count == 0);
return 0;
}
/* Tests we can setup an EBB on our child - if it's expecting it */
int ebb_on_willing_child(void)
{
union pipe read_pipe, write_pipe;
struct event event;
pid_t pid;
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
pid = fork();
if (pid == 0) {
/* NB order of pipes looks reversed */
exit(victim_child(write_pipe, read_pipe));
}
/* Signal the child to setup its EBB handler */
FAIL_IF(sync_with_child(read_pipe, write_pipe));
/* Child is running now */
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open_with_pid(&event, pid));
FAIL_IF(ebb_event_enable(&event));
/* Child show now take EBBs and then exit */
FAIL_IF(wait_for_child(pid));
event_close(&event);
return 0;
}
int main(void)
{
return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
}

View file

@ -0,0 +1,86 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "ebb.h"
/*
* Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
* event off the PMU.
*/
static int setup_cpu_event(struct event *event, int cpu)
{
event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
event->attr.exclude_kernel = 1;
event->attr.exclude_hv = 1;
event->attr.exclude_idle = 1;
SKIP_IF(require_paranoia_below(1));
FAIL_IF(event_open_with_cpu(event, cpu));
FAIL_IF(event_enable(event));
return 0;
}
int ebb_vs_cpu_event(void)
{
union pipe read_pipe, write_pipe;
struct event event;
int cpu, rc;
pid_t pid;
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
pid = fork();
if (pid == 0) {
/* NB order of pipes looks reversed */
exit(ebb_child(write_pipe, read_pipe));
}
/* Signal the child to install its EBB event and wait */
FAIL_IF(sync_with_child(read_pipe, write_pipe));
/* Now try to install our CPU event */
rc = setup_cpu_event(&event, cpu);
if (rc) {
kill_child_and_wait(pid);
return rc;
}
/* Signal the child to run */
FAIL_IF(sync_with_child(read_pipe, write_pipe));
/* .. and wait for it to complete */
FAIL_IF(wait_for_child(pid));
FAIL_IF(event_disable(&event));
FAIL_IF(event_read(&event));
event_report(&event);
/* The cpu event may have run, but we don't expect 100% */
FAIL_IF(event.result.enabled >= event.result.running);
return 0;
}
int main(void)
{
return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
}

View file

@ -0,0 +1,131 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <stdio.h>
#include <stdlib.h>
#include "ebb.h"
/*
* Test various attributes of the EBB event are enforced.
*/
int event_attributes(void)
{
struct event event, leader;
event_init(&event, 0x1001e);
event_leader_ebb_init(&event);
/* Expected to succeed */
FAIL_IF(event_open(&event));
event_close(&event);
event_init(&event, 0x001e); /* CYCLES - no PMC specified */
event_leader_ebb_init(&event);
/* Expected to fail, no PMC specified */
FAIL_IF(event_open(&event) == 0);
event_init(&event, 0x2001e);
event_leader_ebb_init(&event);
event.attr.exclusive = 0;
/* Expected to fail, not exclusive */
FAIL_IF(event_open(&event) == 0);
event_init(&event, 0x3001e);
event_leader_ebb_init(&event);
event.attr.freq = 1;
/* Expected to fail, sets freq */
FAIL_IF(event_open(&event) == 0);
event_init(&event, 0x4001e);
event_leader_ebb_init(&event);
event.attr.sample_period = 1;
/* Expected to fail, sets sample_period */
FAIL_IF(event_open(&event) == 0);
event_init(&event, 0x1001e);
event_leader_ebb_init(&event);
event.attr.enable_on_exec = 1;
/* Expected to fail, sets enable_on_exec */
FAIL_IF(event_open(&event) == 0);
event_init(&event, 0x1001e);
event_leader_ebb_init(&event);
event.attr.inherit = 1;
/* Expected to fail, sets inherit */
FAIL_IF(event_open(&event) == 0);
event_init(&leader, 0x1001e);
event_leader_ebb_init(&leader);
FAIL_IF(event_open(&leader));
event_init(&event, 0x20002);
event_ebb_init(&event);
/* Expected to succeed */
FAIL_IF(event_open_with_group(&event, leader.fd));
event_close(&leader);
event_close(&event);
event_init(&leader, 0x1001e);
event_leader_ebb_init(&leader);
FAIL_IF(event_open(&leader));
event_init(&event, 0x20002);
/* Expected to fail, event doesn't request EBB, leader does */
FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
event_close(&leader);
event_init(&leader, 0x1001e);
event_leader_ebb_init(&leader);
/* Clear the EBB flag */
leader.attr.config &= ~(1ull << 63);
FAIL_IF(event_open(&leader));
event_init(&event, 0x20002);
event_ebb_init(&event);
/* Expected to fail, leader doesn't request EBB */
FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
event_close(&leader);
event_init(&leader, 0x1001e);
event_leader_ebb_init(&leader);
leader.attr.exclusive = 0;
/* Expected to fail, leader isn't exclusive */
FAIL_IF(event_open(&leader) == 0);
event_init(&leader, 0x1001e);
event_leader_ebb_init(&leader);
leader.attr.pinned = 0;
/* Expected to fail, leader isn't pinned */
FAIL_IF(event_open(&leader) == 0);
event_init(&event, 0x1001e);
event_leader_ebb_init(&event);
/* Expected to fail, not a task event */
SKIP_IF(require_paranoia_below(1));
FAIL_IF(event_open_with_cpu(&event, 0) == 0);
return 0;
}
int main(void)
{
return test_harness(event_attributes, "event_attributes");
}

View file

@ -0,0 +1,43 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <ppc-asm.h>
.text
FUNC_START(thirty_two_instruction_loop)
cmpwi r3,0
beqlr
addi r4,r3,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1
addi r4,r4,1 # 28 addi's
subi r3,r3,1
b FUNC_NAME(thirty_two_instruction_loop)
FUNC_END(thirty_two_instruction_loop)

View file

@ -0,0 +1,79 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <setjmp.h>
#include <signal.h>
#include "ebb.h"
/*
* Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
* are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
* the PMU.
*/
static struct event event;
static int child(void)
{
/* Even though we have EBE=0 we can still see the EBB regs */
FAIL_IF(mfspr(SPRN_BESCR) != 0);
FAIL_IF(mfspr(SPRN_EBBHR) != 0);
FAIL_IF(mfspr(SPRN_EBBRR) != 0);
FAIL_IF(catch_sigill(write_pmc1));
/* We can still read from the event, though it is on our parent */
FAIL_IF(event_read(&event));
return 0;
}
/* Tests that fork clears EBB state */
int fork_cleanup(void)
{
pid_t pid;
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
FAIL_IF(event_open(&event));
ebb_enable_pmc_counting(1);
setup_ebb_handler(standard_ebb_callee);
ebb_global_enable();
FAIL_IF(ebb_event_enable(&event));
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
/* Don't need to actually take any EBBs */
pid = fork();
if (pid == 0)
exit(child());
/* Child does the actual testing */
FAIL_IF(wait_for_child(pid));
/* After fork */
event_close(&event);
return 0;
}
int main(void)
{
return test_harness(fork_cleanup, "fork_cleanup");
}

View file

@ -0,0 +1,164 @@
/*
* Copyright 2014, Michael Ellerman, IBM Corp.
* Licensed under GPLv2.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <sys/prctl.h>
#include "ebb.h"
/*
* Run a calibrated instruction loop and count instructions executed using
* EBBs. Make sure the counts look right.
*/
extern void thirty_two_instruction_loop(uint64_t loops);
static bool counters_frozen = true;
static int do_count_loop(struct event *event, uint64_t instructions,
uint64_t overhead, bool report)
{
int64_t difference, expected;
double percentage;
clear_ebb_stats();
counters_frozen = false;
mb();
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
thirty_two_instruction_loop(instructions >> 5);
counters_frozen = true;
mb();
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
count_pmc(4, sample_period);
event->result.value = ebb_state.stats.pmc_count[4-1];
expected = instructions + overhead;
difference = event->result.value - expected;
percentage = (double)difference / event->result.value * 100;
if (report) {
printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
printf("Expected %lu\n", expected);
printf("Actual %llu\n", event->result.value);
printf("Error %ld, %f%%\n", difference, percentage);
printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
}
if (difference < 0)
difference = -difference;
/* Tolerate a difference of up to 0.0001 % */
difference *= 10000 * 100;
if (difference / event->result.value)
return -1;
return 0;
}
/* Count how many instructions it takes to do a null loop */
static uint64_t determine_overhead(struct event *event)
{
uint64_t current, overhead;
int i;
do_count_loop(event, 0, 0, false);
overhead = event->result.value;
for (i = 0; i < 100; i++) {
do_count_loop(event, 0, 0, false);
current = event->result.value;
if (current < overhead) {
printf("Replacing overhead %lu with %lu\n", overhead, current);
overhead = current;
}
}
return overhead;
}
static void pmc4_ebb_callee(void)
{
uint64_t val;
val = mfspr(SPRN_BESCR);
if (!(val & BESCR_PMEO)) {
ebb_state.stats.spurious++;
goto out;
}
ebb_state.stats.ebb_count++;
count_pmc(4, sample_period);
out:
if (counters_frozen)
reset_ebb_with_clear_mask(MMCR0_PMAO);
else
reset_ebb();
}
int instruction_count(void)
{
struct event event;
uint64_t overhead;
event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
event.attr.exclude_hv = 1;
event.attr.exclude_idle = 1;
FAIL_IF(event_open(&event));
FAIL_IF(ebb_event_enable(&event));
sample_period = COUNTER_OVERFLOW;
setup_ebb_handler(pmc4_ebb_callee);
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
ebb_global_enable();
overhead = determine_overhead(&event);
printf("Overhead of null loop: %lu instructions\n", overhead);
/* Run for 1M instructions */
FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
/* Run for 10M instructions */
FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
/* Run for 100M instructions */
FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
/* Run for 1G instructions */
FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
/* Run for 16G instructions */
FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
/* Run for 64G instructions */
FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
/* Run for 128G instructions */
FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
ebb_global_disable();
event_close(&event);
printf("Finished OK\n");
return 0;
}
int main(void)
{
return test_harness(instruction_count, "instruction_count");
}

Some files were not shown because too many files have changed in this diff Show more