Fixed MTP to work with TWRP

2025-09-07 16:58:04 -04:00 · 2018-06-19 23:16:04 +02:00 · 2018-06-19 23:16:04 +02:00 · f6dfaef42e
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
--- a/tools/testing/fault-injection/failcmd.sh
+++ b/tools/testing/fault-injection/failcmd.sh
@ -0,0 +1,219 @@
+#!/bin/bash
+#
+# NAME
+#	failcmd.sh - run a command with injecting slab/page allocation failures
+#
+# SYNOPSIS
+#	failcmd.sh --help
+#	failcmd.sh [<options>] command [arguments]
+#
+# DESCRIPTION
+#	Run command with injecting slab/page allocation failures by fault
+#	injection.
+#
+#	NOTE: you need to run this script as root.
+#
+
+usage()
+{
+	cat >&2 <<EOF
+Usage: $0 [options] command [arguments]
+
+OPTIONS
+	-p percent
+	--probability=percent
+		likelihood of failure injection, in percent.
+		Default value is 1
+
+	-t value
+	--times=value
+		specifies how many times failures may happen at most.
+		Default value is 1
+
+	--oom-kill-allocating-task=value
+		set /proc/sys/vm/oom_kill_allocating_task to specified value
+		before running the command.
+		Default value is 1
+
+	-h, --help
+		Display a usage message and exit
+
+	--interval=value, --space=value, --verbose=value, --task-filter=value,
+	--stacktrace-depth=value, --require-start=value, --require-end=value,
+	--reject-start=value, --reject-end=value, --ignore-gfp-wait=value
+		See Documentation/fault-injection/fault-injection.txt for more
+		information
+
+	failslab options:
+	--cache-filter=value
+
+	fail_page_alloc options:
+	--ignore-gfp-highmem=value, --min-order=value
+
+ENVIRONMENT
+	FAILCMD_TYPE
+		The following values for FAILCMD_TYPE are recognized:
+
+		failslab
+			inject slab allocation failures
+		fail_page_alloc
+			inject page allocation failures
+
+		If FAILCMD_TYPE is not defined, then failslab is used.
+EOF
+}
+
+if [ $UID != 0 ]; then
+	echo must be run as root >&2
+	exit 1
+fi
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'`
+
+if [ ! -d "$DEBUGFS" ]; then
+	echo debugfs is not mounted >&2
+	exit 1
+fi
+
+FAILCMD_TYPE=${FAILCMD_TYPE:-failslab}
+FAULTATTR=$DEBUGFS/$FAILCMD_TYPE
+
+if [ ! -d $FAULTATTR ]; then
+	echo $FAILCMD_TYPE is not available >&2
+	exit 1
+fi
+
+LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter:
+LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end:
+LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help
+
+if [ $FAILCMD_TYPE = failslab ]; then
+	LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter:
+elif [ $FAILCMD_TYPE = fail_page_alloc ]; then
+	LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order:
+fi
+
+TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"`
+
+if [ $? != 0 ]; then
+	usage
+	exit 1
+fi
+
+eval set -- "$TEMP"
+
+fault_attr_default()
+{
+	echo N > $FAULTATTR/task-filter
+	echo 0 > $FAULTATTR/probability
+	echo 1 > $FAULTATTR/times
+}
+
+fault_attr_default
+
+oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task`
+
+restore_values()
+{
+	fault_attr_default
+	echo $oom_kill_allocating_task_saved \
+		> /proc/sys/vm/oom_kill_allocating_task
+}
+
+#
+# Default options
+#
+declare -i oom_kill_allocating_task=1
+declare task_filter=Y
+declare -i probability=1
+declare -i times=1
+
+while true; do
+	case "$1" in
+	-p|--probability)
+		probability=$2
+		shift 2
+		;;
+	-i|--interval)
+		echo $2 > $FAULTATTR/interval
+		shift 2
+		;;
+	-t|--times)
+		times=$2
+		shift 2
+		;;
+	-s|--space)
+		echo $2 > $FAULTATTR/space
+		shift 2
+		;;
+	-v|--verbose)
+		echo $2 > $FAULTATTR/verbose
+		shift 2
+		;;
+	--task-filter)
+		task_filter=$2
+		shift 2
+		;;
+	--stacktrace-depth)
+		echo $2 > $FAULTATTR/stacktrace-depth
+		shift 2
+		;;
+	--require-start)
+		echo $2 > $FAULTATTR/require-start
+		shift 2
+		;;
+	--require-end)
+		echo $2 > $FAULTATTR/require-end
+		shift 2
+		;;
+	--reject-start)
+		echo $2 > $FAULTATTR/reject-start
+		shift 2
+		;;
+	--reject-end)
+		echo $2 > $FAULTATTR/reject-end
+		shift 2
+		;;
+	--oom-kill-allocating-task)
+		oom_kill_allocating_task=$2
+		shift 2
+		;;
+	--ignore-gfp-wait)
+		echo $2 > $FAULTATTR/ignore-gfp-wait
+		shift 2
+		;;
+	--cache-filter)
+		echo $2 > $FAULTATTR/cache_filter
+		shift 2
+		;;
+	--ignore-gfp-highmem)
+		echo $2 > $FAULTATTR/ignore-gfp-highmem
+		shift 2
+		;;
+	--min-order)
+		echo $2 > $FAULTATTR/min-order
+		shift 2
+		;;
+	-h|--help)
+		usage
+		exit 0
+		shift
+		;;
+	--)
+		shift
+		break
+		;;
+	esac
+done
+
+[ -z "$1" ] && exit 0
+
+echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task
+echo $task_filter > $FAULTATTR/task-filter
+echo $probability > $FAULTATTR/probability
+echo $times > $FAULTATTR/times
+
+trap "restore_values" SIGINT SIGTERM EXIT
+
+cmd="echo 1 > /proc/self/make-it-fail && exec $@"
+bash -c "$cmd"
--- a/tools/testing/ktest/compare-ktest-sample.pl
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@ -0,0 +1,32 @@
+#!/usr/bin/perl
+
+open (IN,"ktest.pl");
+while (<IN>) {
+    # hashes are now used
+    if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+	/^\s*"?([A-Z].*?)"?\s*=>\s*/ ||
+	/set_test_option\("(.*?)"/) {
+	$opt{$1} = 1;
+    }
+}
+close IN;
+
+open (IN, "sample.conf");
+while (<IN>) {
+    if (/^\s*#?\s*([A-Z]\S*)\s*=/) {
+	$samp{$1} = 1;
+    }
+}
+close IN;
+
+foreach $opt (keys %opt) {
+    if (!defined($samp{$opt})) {
+	print "opt = $opt\n";
+    }
+}
+
+foreach $samp (keys %samp) {
+    if (!defined($opt{$samp})) {
+	print "samp = $samp\n";
+    }
+}
--- a/tools/testing/ktest/examples/README
+++ b/tools/testing/ktest/examples/README
@ -0,0 +1,32 @@
+This directory contains example configs to use ktest for various tasks.
+The configs still need to be customized for your environment, but it
+is broken up by task which makes it easier to understand how to set up
+ktest.
+
+The configs are based off of real working configs but have been modified
+and commented to show more generic use cases that are more helpful for
+developers.
+
+crosstests.conf - this config shows an example of testing a git repo against
+    lots of different architectures. It only does build tests, but makes
+    it easy to compile test different archs. You can download the arch
+    cross compilers from:
+  http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+
+test.conf - A generic example of a config. This is based on an actual config
+     used to perform real testing.
+
+kvm.conf - A example of a config that is used to test a virtual guest running
+     on a host.
+
+snowball.conf - An example config that was used to demo ktest.pl against
+     a snowball ARM board.
+
+include/  -  The include directory holds default configs that can be
+    included into other configs. This is a real use example that shows how
+    to reuse configs for various machines or set ups. The files here
+    are included by other config files, where the other config files define
+    options and variables that will make the included config work for the
+    given environment.
+
+
--- a/tools/testing/ktest/examples/crosstests.conf
+++ b/tools/testing/ktest/examples/crosstests.conf
@ -0,0 +1,254 @@
+#
+# Example config for cross compiling
+#
+# In this config, it is expected that the tool chains from:
+#
+#   http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+#
+# running on a x86_64 system have been downloaded and installed into:
+#
+#   /usr/local/
+#
+# such that the compiler binaries are something like:
+#
+#   /usr/local/gcc-4.5.2-nolibc/mips-linux/bin/mips-linux-gcc
+#
+# Some of the archs will use gcc-4.5.1 instead of gcc-4.5.2
+# this config uses variables to differentiate them.
+# 
+# Comments describe some of the options, but full descriptions of
+# options are described in the samples.conf file.
+
+# ${PWD} is defined by ktest.pl to be the directory that the user
+# was in when they executed ktest.pl. It may be better to hardcode the
+# path name here. THIS_DIR is the variable used through out the config file
+# in case you want to change it.
+
+THIS_DIR := ${PWD}
+
+# Update the BUILD_DIR option to the location of your git repo you want to test.
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# The build will go into this directory. It will be created when you run the test.
+OUTPUT_DIR = ${THIS_DIR}/cross-compile
+
+# The build will be compiled with -j8
+BUILD_OPTIONS = -j8
+
+# The test will not stop when it hits a failure.
+DIE_ON_FAILURE = 0
+
+# If you want to have ktest.pl store the failure somewhere, uncomment this option
+# and change the directory where ktest should store the failures.
+#STORE_FAILURES = ${THIS_DIR}/failures
+
+# The log file is stored in the OUTPUT_DIR called cross.log
+# If you enable this, you need to create the OUTPUT_DIR. It wont be created for you.
+LOG_FILE = ${OUTPUT_DIR}/cross.log
+
+# The log file will be cleared each time you run ktest.
+CLEAR_LOG = 1
+
+# As some archs do not build with the defconfig, they have been marked
+# to be ignored. If you want to test them anyway, change DO_FAILED to 1.
+# If a test that has been marked as DO_FAILED passes, then you should change
+# that test to be DO_DEFAULT
+
+DO_FAILED := 0
+DO_DEFAULT := 1
+
+# By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single
+# arch that you want to test. (uncomment RUN and chose your arch)
+#RUN := m32r
+
+# At the bottom of the config file exists a bisect test. You can update that
+# test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable
+# to run the bisect on the arch.
+#RUN := bisect
+
+# By default all tests will be running gcc 4.5.2. Some tests are using 4.5.1
+# and they select that in the test.
+# Note: GCC_VER is declared as on option and not a variable ('=' instead of ':=')
+# This is important. A variable is used only in the config file and if it is set
+# it stays that way for the rest of the config file until it is change again.
+# Here we want GCC_VER to remain persistent and change for each test, as it is used in
+# the MAKE_CMD. By using '=' instead of ':=' we achieve our goal.
+
+GCC_VER = 4.5.2
+MAKE_CMD = PATH=/usr/local/gcc-${GCC_VER}-nolibc/${CROSS}/bin:$PATH CROSS_COMPILE=${CROSS}- make ARCH=${ARCH}
+
+# all tests are only doing builds.
+TEST_TYPE = build
+
+# If you want to add configs on top of the defconfig, you can add those configs into
+# the add-config file and uncomment this option. This is useful if you want to test
+# all cross compiles with PREEMPT set, or TRACING on, etc.
+#ADD_CONFIG = ${THIS_DIR}/add-config
+
+# All tests are using defconfig
+BUILD_TYPE = defconfig
+
+# The test names will have the arch and cross compiler used. This will be shown in
+# the results.
+TEST_NAME = ${ARCH} ${CROSS}
+
+# alpha
+TEST_START IF ${RUN} == alpha || ${DO_DEFAULT}
+# Notice that CROSS and ARCH are also options and not variables (again '=' instead
+# of ':='). This is because TEST_NAME and MAKE_CMD wil use them for each test.
+# Only options are available during runs. Variables are only present in parsing the
+# config file.
+CROSS = alpha-linux
+ARCH = alpha
+
+# arm
+TEST_START IF ${RUN} == arm || ${DO_DEFAULT}
+CROSS = arm-unknown-linux-gnueabi
+ARCH = arm
+
+# black fin
+TEST_START IF ${RUN} == bfin || ${DO_DEFAULT}
+CROSS = bfin-uclinux
+ARCH = blackfin
+BUILD_OPTIONS = -j8 vmlinux
+
+# cris - FAILS?
+TEST_START IF ${RUN} == cris || ${RUN} == cris64 || ${DO_FAILED}
+CROSS = cris-linux
+ARCH = cris
+
+# cris32 - not right arch?
+TEST_START IF ${RUN} == cris || ${RUN} == cris32 || ${DO_FAILED}
+CROSS = crisv32-linux
+ARCH = cris
+
+# ia64
+TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT}
+CROSS = ia64-linux
+ARCH = ia64
+
+# frv
+TEST_START IF ${RUN} == frv || ${DO_FAILED}
+CROSS = frv-linux
+ARCH = frv
+GCC_VER = 4.5.1
+
+# m68k fails with error?
+TEST_START IF ${RUN} == m68k || ${DO_DEFAULT}
+CROSS = m68k-linux
+ARCH = m68k
+
+# mips64
+TEST_START IF ${RUN} == mips || ${RUN} == mips64 || ${DO_DEFAULT}
+CROSS = mips64-linux
+ARCH = mips
+
+# mips32
+TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT}
+CROSS = mips-linux
+ARCH = mips
+
+# m32r
+TEST_START IF ${RUN} == m32r || ${DO_FAILED}
+CROSS = m32r-linux
+ARCH = m32r
+GCC_VER = 4.5.1
+BUILD_OPTIONS = -j8 vmlinux
+
+# parisc64 failed?
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED}
+CROSS = hppa64-linux
+ARCH = parisc
+
+# parisc
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa32 || ${DO_FAILED}
+CROSS = hppa-linux
+ARCH = parisc
+
+# ppc
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc32 || ${DO_DEFAULT}
+CROSS = powerpc-linux
+ARCH = powerpc
+
+# ppc64
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc64 || ${DO_DEFAULT}
+CROSS = powerpc64-linux
+ARCH = powerpc
+
+# s390
+TEST_START IF ${RUN} == s390 || ${DO_DEFAULT}
+CROSS = s390x-linux
+ARCH = s390
+
+# sh
+TEST_START IF ${RUN} == sh || ${DO_DEFAULT}
+CROSS = sh4-linux
+ARCH = sh
+
+# sparc64
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc64 || ${DO_DEFAULT}
+CROSS = sparc64-linux
+ARCH = sparc64
+
+# sparc
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc32 || ${DO_DEFAULT}
+CROSS = sparc-linux
+ARCH = sparc
+
+# xtensa failed
+TEST_START IF ${RUN} == xtensa || ${DO_FAILED}
+CROSS = xtensa-linux
+ARCH = xtensa
+
+# UML
+TEST_START IF ${RUN} == uml || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=um SUBARCH=x86_64
+ARCH = uml
+CROSS =
+
+TEST_START IF ${RUN} == x86 || ${RUN} == i386 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=i386
+ARCH = i386
+CROSS = 
+
+TEST_START IF ${RUN} == x86 || ${RUN} == x86_64 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=x86_64
+ARCH = x86_64
+CROSS = 
+
+#################################
+
+# This is a bisect if needed. You need to give it a MIN_CONFIG that
+# will be the config file it uses. Basically, just copy the created defconfig
+# for the arch someplace and point MIN_CONFIG to it.
+TEST_START IF ${RUN} == bisect
+MIN_CONFIG = ${THIS_DIR}/min-config
+CROSS = s390x-linux
+ARCH = s390
+TEST_TYPE = bisect
+BISECT_TYPE = build
+BISECT_GOOD = v3.1
+BISECT_BAD = v3.2
+CHECKOUT = v3.2
+
+#################################
+
+# These defaults are needed to keep ktest.pl from complaining. They are
+# ignored because the test does not go pass the build. No install or
+# booting of the target images.
+
+DEFAULTS
+MACHINE = crosstest
+SSH_USER = root
+BUILD_TARGET = cross
+TARGET_IMAGE = image
+POWER_CYCLE = cycle
+CONSOLE = console
+LOCALVERSION = version
+GRUB_MENU = grub
+
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+REBOOT_ON_SUCCESS = 0
+
--- a/tools/testing/ktest/examples/include/bisect.conf
+++ b/tools/testing/ktest/examples/include/bisect.conf
@ -0,0 +1,90 @@
+#
+# This example shows the bisect tests (git bisect and config bisect)
+#
+
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# Set TEST to 'bisect' to do a normal git bisect. You need
+# to modify the options below to make it bisect the exact
+# commits you are interested in.
+#
+TEST_START IF ${TEST} == bisect
+TEST_TYPE = bisect
+# You must set the commit that was considered good (git bisect good)
+BISECT_GOOD = v3.3
+# You must set the commit that was considered bad (git bisect bad)
+BISECT_BAD = HEAD
+# It's best to specify the branch to checkout before starting the bisect.
+CHECKOUT = origin/master
+# This can be build, boot, or test. Here we are doing a bisect
+# that requires to run a test to know if the bisect was good or bad.
+# The test should exit with 0 on good, non-zero for bad. But see
+# the BISECT_RET_* options in samples.conf to override this.
+BISECT_TYPE = test
+TEST = ${RUN_TEST}
+# It is usually a good idea to confirm that the GOOD and the BAD
+# commits are truly good and bad respectively. Having BISECT_CHECK
+# set to 1 will check both that the good commit works and the bad
+# commit fails. If you only want to check one or the other,
+# set BISECT_CHECK to 'good' or to 'bad'.
+BISECT_CHECK = 1
+#BISECT_CHECK = good
+#BISECT_CHECK = bad
+
+# Usually it's a good idea to specify the exact config you
+# want to use throughout the entire bisect. Here we placed
+# it in the directory we called ktest.pl from and named it
+# 'config-bisect'.
+MIN_CONFIG = ${THIS_DIR}/config-bisect
+# By default, if we are doing a BISECT_TYPE = test run but the
+# build or boot fails, ktest.pl will do a 'git bisect skip'.
+# Uncomment the below option to make ktest stop testing on such
+# an error.
+#BISECT_SKIP = 0
+# Now if you had BISECT_SKIP = 0 and the test fails, you can
+# examine what happened and then do 'git bisect log > /tmp/replay'
+# Set BISECT_REPLAY to /tmp/replay and ktest.pl will run the
+# 'git bisect replay /tmp/replay' before continuing the bisect test.
+#BISECT_REPLAY = /tmp/replay
+# If you used BISECT_REPLAY after the bisect test failed, you may
+# not want to continue the bisect on that commit that failed.
+# By setting BISECT_START to a new commit. ktest.pl will checkout
+# that commit after it has performed the 'git bisect replay' but
+# before it continues running the bisect test.
+#BISECT_START = 2545eb6198e7e1ec50daa0cfc64a4cdfecf24ec9
+
+# Now if you don't trust ktest.pl to make the decisions for you, then
+# set BISECT_MANUAL to 1. This will cause ktest.pl not to decide
+# if the commit was good or bad. Instead, it will ask you to tell
+# it if the current commit was good. In the mean time, you could
+# take the result, load it on any machine you want. Run several tests,
+# or whatever you feel like. Then, when you are happy, you can tell
+# ktest if you think it was good or not and ktest.pl will continue
+# the git bisect. You can even change what commit it is currently at.
+#BISECT_MANUAL = 1
+
+
+# One of the unique tests that ktest does is the config bisect.
+# Currently (which hopefully will be fixed soon), the bad config
+# must be a superset of the good config. This is because it only
+# searches for a config that causes the target to fail. If the
+# good config is not a subset of the bad config, or if the target
+# fails because of a lack of a config, then it will not find
+# the config for you.
+TEST_START IF ${TEST} == config-bisect
+TEST_TYPE = config_bisect
+# set to build, boot, test
+CONFIG_BISECT_TYPE = boot
+# Set the config that is considered bad.
+CONFIG_BISECT = ${THIS_DIR}/config-bad
+# This config is optional. By default it uses the
+# MIN_CONFIG as the good config.
+CONFIG_BISECT_GOOD = ${THIS_DIR}/config-good
--- a/tools/testing/ktest/examples/include/defaults.conf
+++ b/tools/testing/ktest/examples/include/defaults.conf
@ -0,0 +1,157 @@
+# This file holds defaults for most the tests. It defines the options that
+# are most common to tests that are likely to be shared.
+#
+# Note, after including this file, a config file may override any option
+# with a DEFAULTS OVERRIDE section.
+#
+
+# For those cases that use the same machine to boot a 64 bit
+# and a 32 bit version. The MACHINE is the DNS name to get to the
+# box (usually different if it was 64 bit or 32 bit) but the
+# BOX here is defined as a variable that will be the name of the box
+# itself. It is useful for calling scripts that will power cycle
+# the box, as only one script needs to be created to power cycle
+# even though the box itself has multiple operating systems on it.
+# By default, BOX and MACHINE are the same.
+
+DEFAULTS IF NOT DEFINED BOX
+BOX := ${MACHINE}
+
+
+# Consider each box as 64 bit box, unless the config including this file
+# has defined BITS = 32
+
+DEFAULTS IF NOT DEFINED BITS
+BITS := 64
+
+
+DEFAULTS
+
+# THIS_DIR is used through out the configs and defaults to ${PWD} which
+# is the directory that ktest.pl was called from.
+
+THIS_DIR := ${PWD}
+
+
+# to organize your configs, having each machine save their configs
+# into a separate directly is useful.
+CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE}
+
+# Reset the log before running each test.
+CLEAR_LOG = 1
+
+# As installing kernels usually requires root privilege, default the
+# user on the target as root. It is also required that the target
+# allows ssh to root from the host without asking for a password.
+
+SSH_USER = root
+
+# For accesing the machine, we will ssh to root@machine.
+SSH := ssh ${SSH_USER}@${MACHINE}
+
+# Update this. The default here is ktest will ssh to the target box
+# and run a script called 'run-test' located on that box.
+TEST = ${SSH} run-test
+
+# Point build dir to the git repo you use
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# Each machine will have its own output build directory.
+OUTPUT_DIR = ${THIS_DIR}/build/${MACHINE}
+
+# Yes this config is focused on x86 (but ktest works for other archs too)
+BUILD_TARGET = arch/x86/boot/bzImage
+TARGET_IMAGE = /boot/vmlinuz-test
+
+# have directory for the scripts to reboot and power cycle the boxes
+SCRIPTS_DIR := ${THIS_DIR}/scripts
+
+# You can have each box/machine have a script to power cycle it.
+# Name your script <box>-cycle.
+POWER_CYCLE = ${SCRIPTS_DIR}/${BOX}-cycle
+
+# This script is used to power off the box.
+POWER_OFF = ${SCRIPTS_DIR}/${BOX}-poweroff
+
+# Keep your test kernels separate from your other kernels.
+LOCALVERSION = -test
+
+# The /boot/grub/menu.lst is searched for the line:
+#  title Test Kernel
+# and ktest will use that kernel to reboot into.
+# For grub2 or other boot loaders, you need to set BOOT_TYPE
+# to 'script' and define other ways to load the kernel.
+# See snowball.conf example.
+#
+GRUB_MENU = Test Kernel
+
+# The kernel build will use this option.
+BUILD_OPTIONS = -j8
+
+# Keeping the log file with the output dir is convenient.
+LOG_FILE = ${OUTPUT_DIR}/${MACHINE}.log
+
+# Each box should have their own minum configuration
+# See min-config.conf
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+
+# For things like randconfigs, there may be configs you find that
+# are already broken, or there may be some configs that you always
+# want set. Uncomment ADD_CONFIG and point it to the make config files
+# that set the configs you want to keep on (or off) in your build.
+# ADD_CONFIG is usually something to add configs to all machines,
+# where as, MIN_CONFIG is specific per machine.
+#ADD_CONFIG = ${THIS_DIR}/config-broken ${THIS_DIR}/config-general
+
+# To speed up reboots for bisects and patchcheck, instead of
+# waiting 60 seconds for the console to be idle, if this line is
+# seen in the console output, ktest will know the good kernel has
+# finished rebooting and it will be able to continue the tests.
+REBOOT_SUCCESS_LINE = ${MACHINE} login:
+
+# The following is different ways to end the test.
+# by setting the variable REBOOT to: none, error, fail or
+# something else, ktest will power cycle or reboot the target box
+# at the end of the tests.
+#
+# REBOOT := none
+#   Don't do anything at the end of the test.
+#
+# REBOOT := error
+#   Reboot the box if ktest detects an error
+#
+# REBOOT := fail
+#   Do not stop on failure, and after all tests are complete
+#   power off the box (for both success and error)
+#   This is good to run over a weekend and you don't want to waste
+#   electricity.
+#
+
+DEFAULTS IF ${REBOOT} == none
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == error
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == fail
+REBOOT_ON_SUCCESS = 0
+POWEROFF_ON_ERROR = 1
+POWEROFF_ON_SUCCESS = 1
+POWEROFF_AFTER_HALT = 120
+DIE_ON_FAILURE = 0
+
+# Store the failure information into this directory
+# such as the .config, dmesg, and build log.
+STORE_FAILURES = ${THIS_DIR}/failures
+
+DEFAULTS ELSE
+REBOOT_ON_SUCCESS = 1
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
--- a/tools/testing/ktest/examples/include/min-config.conf
+++ b/tools/testing/ktest/examples/include/min-config.conf
@ -0,0 +1,60 @@
+#
+# This file has some examples for creating a MIN_CONFIG.
+# (A .config file that is the minimum for a machine to boot, or
+#  to boot and make a network connection.)
+#
+# A MIN_CONFIG is very useful as it is the minimum configuration
+# needed to boot a given machine. You can debug someone else's
+# .config by only setting the configs in your MIN_CONFIG. The closer
+# your MIN_CONFIG is to the true minimum set of configs needed to
+# boot your machine, the closer the config you test with will be
+# to the users config that had the failure.
+#
+# The make_min_config test allows you to create a MIN_CONFIG that
+# is truly the minimum set of configs needed to boot a box.
+#
+# In this example, the final config will reside in
+# ${CONFIG_DIR}/config-new-min and ${CONFIG_DIR}/config-new-min-net.
+# Just move one to the location you have set for MIN_CONFIG.
+#
+# The first test creates a MIN_CONFIG that will be the minimum
+# configuration to boot ${MACHINE} and be able to ssh to it.
+#
+# The second test creates a MIN_CONFIG that will only boot
+# the target and most likely will not let you ssh to it. (Notice
+# how the second test uses the first test's result to continue with.
+# This is because the second test config is a subset of the first).
+#
+# The ${CONFIG_DIR}/config-skip (and -net) will hold the configs
+# that ktest.pl found would not boot the target without them set.
+# The config-new-min holds configs that ktest.pl could not test
+# directly because another config that was needed to boot the box
+# selected them. Sometimes it is possible that this file will hold
+# the true minimum configuration. You can test to see if this is
+# the case by running the boot test with BOOT_TYPE = allnoconfig and
+# setting setting the MIN_CONFIG to ${CONFIG_DIR}/config-skip. If the
+# machine still boots, then you can use the config-skip as your MIN_CONFIG.
+#
+# These tests can run for several hours (and perhaps days).
+# It's OK to kill the test with a Ctrl^C. By restarting without
+# modifying this config, ktest.pl will notice that the config-new-min(-net)
+# exists, and will use that instead as the starting point.
+# The USE_OUTPUT_MIN_CONFIG is set to 1 to keep ktest.pl from asking
+# you if you want to use the OUTPUT_MIN_CONFIG as the starting point.
+# By using the OUTPUT_MIN_CONFIG as the starting point will allow ktest.pl to
+# start almost where it left off.
+#
+TEST_START IF ${TEST} == min-config
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip-net
+MIN_CONFIG_TYPE = test
+TEST = ${SSH} echo hi
+USE_OUTPUT_MIN_CONFIG = 1
+
+TEST_START IF ${TEST} == min-config && ${MULTI}
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip
+MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+USE_OUTPUT_MIN_CONFIG = 1
--- a/tools/testing/ktest/examples/include/patchcheck.conf
+++ b/tools/testing/ktest/examples/include/patchcheck.conf
@ -0,0 +1,111 @@
+# patchcheck.conf
+#
+# This contains a test that takes two git commits and will test each
+# commit between the two. The build test will look at what files the
+# commit has touched, and if any of those files produce a warning, then
+# the build will fail.
+
+
+# PATCH_START is the commit to begin with and PATCH_END is the commit
+# to end with (inclusive). This is similar to doing a git rebase -i PATCH_START~1
+# and then testing each commit and doing a git rebase --continue.
+# You can use a SHA1, a git tag, or anything that git will accept for a checkout
+
+PATCH_START := HEAD~3
+PATCH_END := HEAD
+
+# Use the oldconfig if build_type wasn't defined
+DEFAULTS IF NOT DEFINED BUILD_TYPE
+DO_BUILD_TYPE := oldconfig
+
+DEFAULTS ELSE
+DO_BUILD_TYPE := ${BUILD_TYPE}
+
+DEFAULTS
+
+
+# Change PATCH_CHECKOUT to be the branch you want to test. The test will
+# do a git checkout of this branch before starting. Obviously both
+# PATCH_START and PATCH_END must be in this branch (and PATCH_START must
+# be contained by PATCH_END).
+
+PATCH_CHECKOUT := test/branch
+
+# Usually it's a good idea to have a set config to use for testing individual
+# patches.
+PATCH_CONFIG := ${CONFIG_DIR}/config-patchcheck
+
+# Change PATCH_TEST to run some test for each patch. Each commit that is
+# tested, after it is built and installed on the test machine, this command
+# will be executed. Usually what is done is to ssh to the target box and
+# run some test scripts. If you just want to boot test your patches
+# comment PATCH_TEST out.
+PATCH_TEST := ${SSH} "/usr/local/bin/ktest-test-script"
+
+DEFAULTS IF DEFINED PATCH_TEST
+PATCH_TEST_TYPE := test
+
+DEFAULTS ELSE
+PATCH_TEST_TYPE := boot
+
+# If for some reason a file has a warning that one of your patches touch
+# but you do not care about it, set IGNORE_WARNINGS to that commit(s)
+# (space delimited)
+#IGNORE_WARNINGS = 39eaf7ef884dcc44f7ff1bac803ca2a1dcf43544 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce
+
+# Instead of just checking for warnings to files that are changed
+# it can be advantageous to check for any new warnings. If a
+# header file is changed, it could cause a warning in a file not
+# touched by the commit. To detect these kinds of warnings, you
+# can use the WARNINGS_FILE option.
+#
+# If the variable CREATE_WARNINGS_FILE is set, this config will
+# enable the WARNINGS_FILE during the patchcheck test. Also,
+# before running the patchcheck test, it will create the
+# warnings file.
+#
+DEFAULTS IF DEFINED CREATE_WARNINGS_FILE
+WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+TEST_START IF DEFINED CREATE_WARNINGS_FILE
+# WARNINGS_FILE is already set by the DEFAULTS above
+TEST_TYPE = make_warnings_file
+# Checkout the commit before the patches to test,
+# and record all the warnings that exist before the patches
+# to test are added
+CHECKOUT = ${PATCHCHECK_START}~1
+# Force a full build
+BUILD_NOCLEAN = 0
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+# If you are running a multi test, and the test failed on the first
+# test but on, say the 5th patch. If you want to restart on the
+# fifth patch, set PATCH_START1. This will make the first test start
+# from this commit instead of the PATCH_START commit.
+# Note, do not change this option. Just define PATCH_START1 in the
+# top config (the one you pass to ktest.pl), and this will use it,
+# otherwise it will just use PATCH_START if PATCH_START1 is not defined.
+DEFAULTS IF NOT DEFINED PATCH_START1
+PATCH_START1 := ${PATCH_START}
+
+TEST_START IF ${TEST} == patchcheck
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START1}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+TEST_START IF ${TEST} == patchcheck && ${MULTI}
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+# Use multi to test different compilers?
+MAKE_CMD = CC=gcc-4.5.1 make
+BUILD_TYPE = ${DO_BUILD_TYPE}
--- a/tools/testing/ktest/examples/include/tests.conf
+++ b/tools/testing/ktest/examples/include/tests.conf
@ -0,0 +1,74 @@
+#
+# This is an example of various tests that you can run
+#
+# The variable TEST can be of boot, build, randconfig, or test.
+#
+# Note that TEST is a variable created with ':=' and only exists
+# throughout the config processing (not during the tests itself).
+#
+# The TEST option (defined with '=') is used to tell ktest.pl
+# what test to run after a successful boot. The TEST option is
+# persistent into the test runs.
+#
+
+# The config that includes this file may define a BOOT_TYPE
+# variable that tells this config what type of boot test to run.
+# If it's not defined, the below DEFAULTS will set the default
+# to 'oldconfig'.
+#
+DEFAULTS IF NOT DEFINED BOOT_TYPE
+BOOT_TYPE := oldconfig
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# If TEST is set to 'boot' then just build a kernel and boot
+# the target.
+TEST_START IF ${TEST} == boot
+TEST_TYPE = boot
+# Notice how we set the BUILD_TYPE option to the BOOT_TYPE variable.
+BUILD_TYPE = ${BOOT_TYPE}
+# Do not do a make mrproper.
+BUILD_NOCLEAN = 1
+
+# If you only want to build the kernel, and perhaps install
+# and test it yourself, then just set TEST to build.
+TEST_START IF ${TEST} == build
+TEST_TYPE = build
+BUILD_TYPE = ${BOOT_TYPE}
+BUILD_NOCLEAN = 1
+
+# Build, install, boot and test with a randconfg 10 times.
+# It is important that you have set MIN_CONFIG in the config
+# that includes this file otherwise it is likely that the
+# randconfig will not have the necessary configs needed to
+# boot your box. This version of the test requires a min
+# config that has enough to make sure the target has network
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min-net
+TEST_TYPE = test
+BUILD_TYPE = randconfig
+TEST = ${RUN_TEST}
+
+# This is the same as above, but only tests to a boot prompt.
+# The MIN_CONFIG used here does not need to have networking
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig && ${MULTI}
+TEST_TYPE = boot
+BUILD_TYPE = randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+MAKE_CMD = make
+
+# This builds, installs, boots and tests the target.
+TEST_START IF ${TEST} == test
+TEST_TYPE = test
+BUILD_TYPE = ${BOOT_TYPE}
+TEST = ${RUN_TEST}
+BUILD_NOCLEAN = 1
--- a/tools/testing/ktest/examples/kvm.conf
+++ b/tools/testing/ktest/examples/kvm.conf
@ -0,0 +1,92 @@
+#
+# This config is an example usage of ktest.pl with a kvm guest
+#
+# The guest is called 'Guest' and this would be something that
+# could be run on the host to test a virtual machine target.
+
+MACHINE = Guest
+
+
+# Use virsh to read the serial console of the guest
+CONSOLE =  virsh console ${MACHINE}
+
+# Use SIGKILL to terminate virsh console. We can't kill virsh console
+# by the default signal, SIGINT.
+CLOSE_CONSOLE_SIGNAL = KILL
+
+#*************************************#
+# This part is the same as test.conf  #
+#*************************************#
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+#  See include/defaults.conf
+REBOOT := empty
+
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+
+#*************************************#
+# Now we are different from test.conf #
+#*************************************#
+
+
+# The example here assumes that Guest is running a Fedora release
+# that uses dracut for its initfs. The POST_INSTALL will be executed
+# after the install of the kernel and modules are complete.
+#
+POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Guests sometimes get stuck on reboot. We wait 3 seconds after running
+# the reboot command and then  do a full power-cycle of the guest.
+# This forces the guest to restart.
+#
+POWERCYCLE_AFTER_REBOOT = 3
+
+# We do the same after the halt command, but this time we wait 20 seconds.
+POWEROFF_AFTER_HALT = 20
+
+
+# As the defaults.conf file has a POWER_CYCLE option already defined,
+# and options can not be defined in the same section more than once
+# (all DEFAULTS sections are considered the same). We use the
+# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
+# options, for the options set in the OVERRIDE section.
+#
+DEFAULTS OVERRIDE
+
+# Instead of using the default POWER_CYCLE option defined in
+# defaults.conf, we use virsh to cycle it. To do so, we destroy
+# the guest, wait 5 seconds, and then start it up again.
+# Crude, but effective.
+#
+POWER_CYCLE = virsh destroy ${MACHINE}; sleep 5; virsh start ${MACHINE}
+
+
+DEFAULTS
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
--- a/tools/testing/ktest/examples/snowball.conf
+++ b/tools/testing/ktest/examples/snowball.conf
@ -0,0 +1,53 @@
+# This example was used to boot the snowball ARM board.
+# See http://people.redhat.com/srostedt/ktest-embedded-2012/
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := /home/rostedt/work/demo/ktest-embed
+LOG_FILE = ${OUTPUT_DIR}/snowball.log
+CLEAR_LOG = 1
+MAKE_CMD = PATH=/usr/local/gcc-4.5.2-nolibc/arm-unknown-linux-gnueabi/bin:$PATH CROSS_COMPILE=arm-unknown-linux-gnueabi- make ARCH=arm
+ADD_CONFIG = ${THIS_DIR}/addconfig
+
+SCP_TO_TARGET = echo "don't do scp"
+
+TFTPBOOT := /var/lib/tftpboot
+TFTPDEF := ${TFTPBOOT}/snowball-default
+TFTPTEST := ${OUTPUT_DIR}/${BUILD_TARGET}
+
+SWITCH_TO_GOOD = cp ${TFTPDEF} ${TARGET_IMAGE}
+SWITCH_TO_TEST = cp ${TFTPTEST} ${TARGET_IMAGE}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START SKIP
+TEST_TYPE = boot
+BUILD_TYPE = u8500_defconfig
+BUILD_NOCLEAN = 1
+
+TEST_START
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${THIS_DIR}/config.newmin
+START_MIN_CONFIG = ${THIS_DIR}/config.orig
+IGNORE_CONFIG = ${THIS_DIR}/config.ignore
+BUILD_NOCLEAN = 1
+
+
+DEFAULTS
+LOCALVERSION = -test
+POWER_CYCLE = echo use the thumb luke; read a
+CONSOLE = cat ${THIS_DIR}/snowball-cat
+REBOOT_TYPE = script
+SSH_USER = root
+BUILD_OPTIONS = -j8 uImage
+BUILD_DIR = ${THIS_DIR}/linux.git
+OUTPUT_DIR = ${THIS_DIR}/snowball-build
+MACHINE = snowball
+TARGET_IMAGE = /var/lib/tftpboot/snowball-image
+BUILD_TARGET = arch/arm/boot/uImage
--- a/tools/testing/ktest/examples/test.conf
+++ b/tools/testing/ktest/examples/test.conf
@ -0,0 +1,62 @@
+#
+# Generic config for a machine
+#
+
+# Name your machine (the DNS name, what you ssh to)
+MACHINE = foo
+
+# BOX can be different than foo, if the machine BOX has
+# multiple partitions with different systems installed. For example,
+# you may have a i386 and x86_64 installation on a test box.
+# If this is the case, MACHINE defines the way to connect to the
+# machine, which may be different between which system the machine
+# is booting into. BOX is used for the scripts to reboot and power cycle
+# the machine, where it does not matter which system the machine boots into.
+#
+#BOX := bar
+
+# Define a way to read the console
+CONSOLE = stty -F /dev/ttyS0 115200 parodd; cat /dev/ttyS0
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+#  See include/defaults.conf
+REBOOT := empty
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+# In case you need to add a patch for a bisect or something
+#PRE_BUILD = patch -p1 < ${THIS_DIR}/fix.patch
+
+# Reset the repo after the build and remove all 'test' modules from the target
+# Notice that DO_POST_BUILD is a variable (defined by ':=') and POST_BUILD
+# is the option (defined by '=')
+
+DO_POST_BUILD := git reset --hard
+POST_BUILD = ${SSH} 'rm -rf /lib/modules/*-test*'; ${DO_POST_BUILD}
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
+
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@ -0,0 +1,50 @@
+TARGETS = breakpoints
+TARGETS += cpu-hotplug
+TARGETS += efivarfs
+TARGETS += kcmp
+TARGETS += memfd
+TARGETS += memory-hotplug
+TARGETS += mqueue
+TARGETS += mount
+TARGETS += net
+TARGETS += ptrace
+TARGETS += timers
+TARGETS += vm
+TARGETS += powerpc
+TARGETS += user
+TARGETS += sysctl
+TARGETS += firmware
+TARGETS += ftrace
+
+TARGETS_HOTPLUG = cpu-hotplug
+TARGETS_HOTPLUG += memory-hotplug
+
+all:
+	for TARGET in $(TARGETS); do \
+		make -C $$TARGET; \
+	done;
+
+run_tests: all
+	for TARGET in $(TARGETS); do \
+		make -C $$TARGET run_tests; \
+	done;
+
+hotplug:
+	for TARGET in $(TARGETS_HOTPLUG); do \
+		make -C $$TARGET; \
+	done;
+
+run_hotplug: hotplug
+	for TARGET in $(TARGETS_HOTPLUG); do \
+		make -C $$TARGET run_full_test; \
+	done;
+
+clean_hotplug:
+	for TARGET in $(TARGETS_HOTPLUG); do \
+		make -C $$TARGET clean; \
+	done;
+
+clean:
+	for TARGET in $(TARGETS); do \
+		make -C $$TARGET clean; \
+	done;
--- a/tools/testing/selftests/README.txt
+++ b/tools/testing/selftests/README.txt
@ -0,0 +1,61 @@
+Linux Kernel Selftests
+
+The kernel contains a set of "self tests" under the tools/testing/selftests/
+directory. These are intended to be small unit tests to exercise individual
+code paths in the kernel.
+
+On some systems, hot-plug tests could hang forever waiting for cpu and
+memory to be ready to be offlined. A special hot-plug target is created
+to run full range of hot-plug tests. In default mode, hot-plug tests run
+in safe mode with a limited scope. In limited mode, cpu-hotplug test is
+run on a single cpu as opposed to all hotplug capable cpus, and memory
+hotplug test is run on 2% of hotplug capable memory instead of 10%.
+
+Running the selftests (hotplug tests are run in limited mode)
+=============================================================
+
+To build the tests:
+
+  $ make -C tools/testing/selftests
+
+
+To run the tests:
+
+  $ make -C tools/testing/selftests run_tests
+
+- note that some tests will require root privileges.
+
+To run only tests targeted for a single subsystem: (including
+hotplug targets in limited mode)
+
+  $  make -C tools/testing/selftests TARGETS=cpu-hotplug run_tests
+
+See the top-level tools/testing/selftests/Makefile for the list of all possible
+targets.
+
+Running the full range hotplug selftests
+========================================
+
+To build the tests:
+
+  $ make -C tools/testing/selftests hotplug
+
+To run the tests:
+
+  $ make -C tools/testing/selftests run_hotplug
+
+- note that some tests will require root privileges.
+
+Contributing new tests
+======================
+
+In general, the rules for for selftests are
+
+ * Do as much as you can if you're not root;
+
+ * Don't take too long;
+
+ * Don't break the build on any architecture, and
+
+ * Don't cause the top-level "make run_tests" to fail if your feature is
+   unconfigured.
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@ -0,0 +1,23 @@
+# Taken from perf makefile
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := x86
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := x86
+endif
+
+
+all:
+ifeq ($(ARCH),x86)
+	gcc breakpoint_test.c -o breakpoint_test
+else
+	echo "Not an x86 target, can't build breakpoints selftests"
+endif
+
+run_tests:
+	@./breakpoint_test || echo "breakpoints selftests: [FAIL]"
+
+clean:
+	rm -fr breakpoint_test
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for breakpoints (and more generally the do_debug() path) in x86.
+ */
+
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+
+/* Breakpoint access modes */
+enum {
+	BP_X = 1,
+	BP_RW = 2,
+	BP_W = 4,
+};
+
+static pid_t child_pid;
+
+/*
+ * Ensures the child and parent are always "talking" about
+ * the same test sequence. (ie: that we haven't forgotten
+ * to call check_trapped() somewhere).
+ */
+static int nr_tests;
+
+static void set_breakpoint_addr(void *addr, int n)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_POKEUSER, child_pid,
+		     offsetof(struct user, u_debugreg[n]), addr);
+	if (ret) {
+		perror("Can't set breakpoint addr\n");
+		exit(-1);
+	}
+}
+
+static void toggle_breakpoint(int n, int type, int len,
+			      int local, int global, int set)
+{
+	int ret;
+
+	int xtype, xlen;
+	unsigned long vdr7, dr7;
+
+	switch (type) {
+	case BP_X:
+		xtype = 0;
+		break;
+	case BP_W:
+		xtype = 1;
+		break;
+	case BP_RW:
+		xtype = 3;
+		break;
+	}
+
+	switch (len) {
+	case 1:
+		xlen = 0;
+		break;
+	case 2:
+		xlen = 4;
+		break;
+	case 4:
+		xlen = 0xc;
+		break;
+	case 8:
+		xlen = 8;
+		break;
+	}
+
+	dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
+		     offsetof(struct user, u_debugreg[7]), 0);
+
+	vdr7 = (xlen | xtype) << 16;
+	vdr7 <<= 4 * n;
+
+	if (local) {
+		vdr7 |= 1 << (2 * n);
+		vdr7 |= 1 << 8;
+	}
+	if (global) {
+		vdr7 |= 2 << (2 * n);
+		vdr7 |= 1 << 9;
+	}
+
+	if (set)
+		dr7 |= vdr7;
+	else
+		dr7 &= ~vdr7;
+
+	ret = ptrace(PTRACE_POKEUSER, child_pid,
+		     offsetof(struct user, u_debugreg[7]), dr7);
+	if (ret) {
+		perror("Can't set dr7");
+		exit(-1);
+	}
+}
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long dummy_var[4];
+
+/* Dummy functions to test execution accesses */
+static void dummy_func(void) { }
+static void dummy_func1(void) { }
+static void dummy_func2(void) { }
+static void dummy_func3(void) { }
+
+static void (*dummy_funcs[])(void) = {
+	dummy_func,
+	dummy_func1,
+	dummy_func2,
+	dummy_func3,
+};
+
+static int trapped;
+
+static void check_trapped(void)
+{
+	/*
+	 * If we haven't trapped, wake up the parent
+	 * so that it notices the failure.
+	 */
+	if (!trapped)
+		kill(getpid(), SIGUSR1);
+	trapped = 0;
+
+	nr_tests++;
+}
+
+static void write_var(int len)
+{
+	char *pcval; short *psval; int *pival; long long *plval;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		switch (len) {
+		case 1:
+			pcval = (char *)&dummy_var[i];
+			*pcval = 0xff;
+			break;
+		case 2:
+			psval = (short *)&dummy_var[i];
+			*psval = 0xffff;
+			break;
+		case 4:
+			pival = (int *)&dummy_var[i];
+			*pival = 0xffffffff;
+			break;
+		case 8:
+			plval = (long long *)&dummy_var[i];
+			*plval = 0xffffffffffffffffLL;
+			break;
+		}
+		check_trapped();
+	}
+}
+
+static void read_var(int len)
+{
+	char cval; short sval; int ival; long long lval;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		switch (len) {
+		case 1:
+			cval = *(char *)&dummy_var[i];
+			break;
+		case 2:
+			sval = *(short *)&dummy_var[i];
+			break;
+		case 4:
+			ival = *(int *)&dummy_var[i];
+			break;
+		case 8:
+			lval = *(long long *)&dummy_var[i];
+			break;
+		}
+		check_trapped();
+	}
+}
+
+/*
+ * Do the r/w/x accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+	int len, local, global, i;
+	char val;
+	int ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		perror("Can't be traced?\n");
+		return;
+	}
+
+	/* Wake up father so that it sets up the first test */
+	kill(getpid(), SIGUSR1);
+
+	/* Test instruction breakpoints */
+	for (local = 0; local < 2; local++) {
+		for (global = 0; global < 2; global++) {
+			if (!local && !global)
+				continue;
+
+			for (i = 0; i < 4; i++) {
+				dummy_funcs[i]();
+				check_trapped();
+			}
+		}
+	}
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				write_var(len);
+			}
+		}
+	}
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				read_var(len);
+			}
+		}
+	}
+
+	/* Icebp trap */
+	asm(".byte 0xf1\n");
+	check_trapped();
+
+	/* Int 3 trap */
+	asm("int $3\n");
+	check_trapped();
+
+	kill(getpid(), SIGUSR1);
+}
+
+static void check_success(const char *msg)
+{
+	const char *msg2;
+	int child_nr_tests;
+	int status;
+
+	/* Wait for the child to SIGTRAP */
+	wait(&status);
+
+	msg2 = "Failed";
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
+					&nr_tests, 0);
+		if (child_nr_tests == nr_tests)
+			msg2 = "Ok";
+		if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) {
+			perror("Can't poke\n");
+			exit(-1);
+		}
+	}
+
+	nr_tests++;
+
+	printf("%s [%s]\n", msg, msg2);
+}
+
+static void launch_instruction_breakpoints(char *buf, int local, int global)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		set_breakpoint_addr(dummy_funcs[i], i);
+		toggle_breakpoint(i, BP_X, 1, local, global, 1);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		sprintf(buf, "Test breakpoint %d with local: %d global: %d",
+			i, local, global);
+		check_success(buf);
+		toggle_breakpoint(i, BP_X, 1, local, global, 0);
+	}
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+			       int local, int global)
+{
+	const char *mode_str;
+	int i;
+
+	if (mode == BP_W)
+		mode_str = "write";
+	else
+		mode_str = "read";
+
+	for (i = 0; i < 4; i++) {
+		set_breakpoint_addr(&dummy_var[i], i);
+		toggle_breakpoint(i, mode, len, local, global, 1);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		sprintf(buf, "Test %s watchpoint %d with len: %d local: "
+			"%d global: %d", mode_str, i, len, local, global);
+		check_success(buf);
+		toggle_breakpoint(i, mode, len, local, global, 0);
+	}
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void launch_tests(void)
+{
+	char buf[1024];
+	int len, local, global, i;
+
+	/* Instruction breakpoints */
+	for (local = 0; local < 2; local++) {
+		for (global = 0; global < 2; global++) {
+			if (!local && !global)
+				continue;
+			launch_instruction_breakpoints(buf, local, global);
+		}
+	}
+
+	/* Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				launch_watchpoints(buf, BP_W, len,
+						   local, global);
+			}
+		}
+	}
+
+	/* Read-Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				launch_watchpoints(buf, BP_RW, len,
+						   local, global);
+			}
+		}
+	}
+
+	/* Icebp traps */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success("Test icebp");
+
+	/* Int 3 traps */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success("Test int 3 trap");
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+	pid_t pid;
+	int ret;
+
+	pid = fork();
+	if (!pid) {
+		trigger_tests();
+		return 0;
+	}
+
+	child_pid = pid;
+
+	wait(NULL);
+
+	launch_tests();
+
+	wait(NULL);
+
+	return 0;
+}
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@ -0,0 +1,9 @@
+all:
+
+run_tests:
+	@/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
+
+run_full_test:
+	@/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+
+clean:
--- a/tools/testing/selftests/cpu-hotplug/on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/on-off-test.sh
@ -0,0 +1,269 @@
+#!/bin/bash
+
+SYSFS=
+
+prerequisite()
+{
+	msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+
+	taskset -p 01 $$
+
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit 0
+	fi
+
+	if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
+		echo $msg cpu hotplug is not supported >&2
+		exit 0
+	fi
+
+	echo "CPU online/offline summary:"
+	online_cpus=`cat $SYSFS/devices/system/cpu/online`
+	online_max=${online_cpus##*-}
+	echo -e "\t Cpus in online state: $online_cpus"
+
+	offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
+	if [[ "a$offline_cpus" = "a" ]]; then
+		offline_cpus=0
+	else
+		offline_max=${offline_cpus##*-}
+	fi
+	echo -e "\t Cpus in offline state: $offline_cpus"
+}
+
+#
+# list all hot-pluggable CPUs
+#
+hotpluggable_cpus()
+{
+	local state=${1:-.\*}
+
+	for cpu in $SYSFS/devices/system/cpu/cpu*; do
+		if [ -f $cpu/online ] && grep -q $state $cpu/online; then
+			echo ${cpu##/*/cpu}
+		fi
+	done
+}
+
+hotplaggable_offline_cpus()
+{
+	hotpluggable_cpus 0
+}
+
+hotpluggable_online_cpus()
+{
+	hotpluggable_cpus 1
+}
+
+cpu_is_online()
+{
+	grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+cpu_is_offline()
+{
+	grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu()
+{
+	echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+offline_cpu()
+{
+	echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu_expect_success()
+{
+	local cpu=$1
+
+	if ! online_cpu $cpu; then
+		echo $FUNCNAME $cpu: unexpected fail >&2
+	elif ! cpu_is_online $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+	fi
+}
+
+online_cpu_expect_fail()
+{
+	local cpu=$1
+
+	if online_cpu $cpu 2> /dev/null; then
+		echo $FUNCNAME $cpu: unexpected success >&2
+	elif ! cpu_is_offline $cpu; then
+		echo $FUNCNAME $cpu: unexpected online >&2
+	fi
+}
+
+offline_cpu_expect_success()
+{
+	local cpu=$1
+
+	if ! offline_cpu $cpu; then
+		echo $FUNCNAME $cpu: unexpected fail >&2
+	elif ! cpu_is_offline $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+	fi
+}
+
+offline_cpu_expect_fail()
+{
+	local cpu=$1
+
+	if offline_cpu $cpu 2> /dev/null; then
+		echo $FUNCNAME $cpu: unexpected success >&2
+	elif ! cpu_is_online $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+	fi
+}
+
+error=-12
+allcpus=0
+priority=0
+online_cpus=0
+online_max=0
+offline_cpus=0
+offline_max=0
+
+while getopts e:ahp: opt; do
+	case $opt in
+	e)
+		error=$OPTARG
+		;;
+	a)
+		allcpus=1
+		;;
+	h)
+		echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+		echo -e "\t default offline one cpu"
+		echo -e "\t run with -a option to offline all cpus"
+		exit
+		;;
+	p)
+		priority=$OPTARG
+		;;
+	esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+	echo "error code must be -4095 <= errno < 0" >&2
+	exit 1
+fi
+
+prerequisite
+
+#
+# Safe test (default) - offline and online one cpu
+#
+if [ $allcpus -eq 0 ]; then
+	echo "Limited scope test: one hotplug cpu"
+	echo -e "\t (leaves cpu in the original state):"
+	echo -e "\t online to offline to online: cpu $online_max"
+	offline_cpu_expect_success $online_max
+	online_cpu_expect_success $online_max
+
+	if [[ $offline_cpus -gt 0 ]]; then
+		echo -e "\t offline to online to offline: cpu $offline_max"
+		online_cpu_expect_success $offline_max
+		offline_cpu_expect_success $offline_max
+	fi
+	exit 0
+else
+	echo "Full scope test: all hotplug cpus"
+	echo -e "\t online all offline cpus"
+	echo -e "\t offline all online cpus"
+	echo -e "\t online all offline cpus"
+fi
+
+#
+# Online all hot-pluggable CPUs
+#
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_success $cpu
+done
+
+#
+# Offline all hot-pluggable CPUs
+#
+for cpu in `hotpluggable_online_cpus`; do
+	offline_cpu_expect_success $cpu
+done
+
+#
+# Online all hot-pluggable CPUs again
+#
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_success $cpu
+done
+
+#
+# Test with cpu notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+
+prerequisite_extra()
+{
+	msg="skip extra tests:"
+
+	/sbin/modprobe -q -r cpu-notifier-error-inject
+	/sbin/modprobe -q cpu-notifier-error-inject priority=$priority
+
+	if [ ! -d "$DEBUGFS" ]; then
+		echo $msg debugfs is not mounted >&2
+		exit 0
+	fi
+
+	if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+		echo $msg cpu-notifier-error-inject module is not available >&2
+		exit 0
+	fi
+}
+
+prerequisite_extra
+
+#
+# Offline all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+	offline_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_fail $cpu
+done
+
+#
+# Online all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+	offline_cpu_expect_fail $cpu
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+/sbin/modprobe -q -r cpu-notifier-error-inject
--- a/tools/testing/selftests/efivarfs/Makefile
+++ b/tools/testing/selftests/efivarfs/Makefile
@ -0,0 +1,12 @@
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -Wall
+
+test_objs = open-unlink create-read
+
+all: $(test_objs)
+
+run_tests: all
+	@/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]"
+
+clean:
+	rm -f $(test_objs)
--- a/tools/testing/selftests/efivarfs/create-read.c
+++ b/tools/testing/selftests/efivarfs/create-read.c
@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+	const char *path;
+	char buf[4];
+	int fd, rc;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <path>\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	path = argv[1];
+
+	/* create a test variable */
+	fd = open(path, O_RDWR | O_CREAT, 0600);
+	if (fd < 0) {
+		perror("open(O_WRONLY)");
+		return EXIT_FAILURE;
+	}
+
+	rc = read(fd, buf, sizeof(buf));
+	if (rc != 0) {
+		fprintf(stderr, "Reading a new var should return EOF\n");
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@ -0,0 +1,198 @@
+#!/bin/bash
+
+efivarfs_mount=/sys/firmware/efi/efivars
+test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
+
+check_prereqs()
+{
+	local msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+
+	if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
+		echo $msg efivarfs is not mounted on $efivarfs_mount >&2
+		exit 0
+	fi
+}
+
+run_test()
+{
+	local test="$1"
+
+	echo "--------------------"
+	echo "running $test"
+	echo "--------------------"
+
+	if [ "$(type -t $test)" = 'function' ]; then
+		( $test )
+	else
+		( ./$test )
+	fi
+
+	if [ $? -ne 0 ]; then
+		echo "  [FAIL]"
+		rc=1
+	else
+		echo "  [PASS]"
+	fi
+}
+
+test_create()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	printf "$attrs\x00" > $file
+
+	if [ ! -e $file ]; then
+		echo "$file couldn't be created" >&2
+		exit 1
+	fi
+
+	if [ $(stat -c %s $file) -ne 5 ]; then
+		echo "$file has invalid size" >&2
+		exit 1
+	fi
+}
+
+test_create_empty()
+{
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	: > $file
+
+	if [ ! -e $file ]; then
+		echo "$file can not be created without writing" >&2
+		exit 1
+	fi
+}
+
+test_create_read()
+{
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+	./create-read $file
+}
+
+test_delete()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	printf "$attrs\x00" > $file
+
+	if [ ! -e $file ]; then
+		echo "$file couldn't be created" >&2
+		exit 1
+	fi
+
+	rm $file
+
+	if [ -e $file ]; then
+		echo "$file couldn't be deleted" >&2
+		exit 1
+	fi
+
+}
+
+# test that we can remove a variable by issuing a write with only
+# attributes specified
+test_zero_size_delete()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	printf "$attrs\x00" > $file
+
+	if [ ! -e $file ]; then
+		echo "$file does not exist" >&2
+		exit 1
+	fi
+
+	printf "$attrs" > $file
+
+	if [ -e $file ]; then
+		echo "$file should have been deleted" >&2
+		exit 1
+	fi
+}
+
+test_open_unlink()
+{
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+	./open-unlink $file
+}
+
+# test that we can create a range of filenames
+test_valid_filenames()
+{
+	local attrs='\x07\x00\x00\x00'
+	local ret=0
+
+	local file_list="abc dump-type0-11-1-1362436005 1234 -"
+	for f in $file_list; do
+		local file=$efivarfs_mount/$f-$test_guid
+
+		printf "$attrs\x00" > $file
+
+		if [ ! -e $file ]; then
+			echo "$file could not be created" >&2
+			ret=1
+		else
+			rm $file
+		fi
+	done
+
+	exit $ret
+}
+
+test_invalid_filenames()
+{
+	local attrs='\x07\x00\x00\x00'
+	local ret=0
+
+	local file_list="
+		-1234-1234-1234-123456789abc
+		foo
+		foo-bar
+		-foo-
+		foo-barbazba-foob-foob-foob-foobarbazfoo
+		foo-------------------------------------
+		-12345678-1234-1234-1234-123456789abc
+		a-12345678=1234-1234-1234-123456789abc
+		a-12345678-1234=1234-1234-123456789abc
+		a-12345678-1234-1234=1234-123456789abc
+		a-12345678-1234-1234-1234=123456789abc
+		1112345678-1234-1234-1234-123456789abc"
+
+	for f in $file_list; do
+		local file=$efivarfs_mount/$f
+
+		printf "$attrs\x00" 2>/dev/null > $file
+
+		if [ -e $file ]; then
+			echo "Creating $file should have failed" >&2
+			rm $file
+			ret=1
+		fi
+	done
+
+	exit $ret
+}
+
+check_prereqs
+
+rc=0
+
+run_test test_create
+run_test test_create_empty
+run_test test_create_read
+run_test test_delete
+run_test test_zero_size_delete
+run_test test_open_unlink
+run_test test_valid_filenames
+run_test test_invalid_filenames
+
+exit $rc
--- a/tools/testing/selftests/efivarfs/open-unlink.c
+++ b/tools/testing/selftests/efivarfs/open-unlink.c
@ -0,0 +1,63 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+	const char *path;
+	char buf[5];
+	int fd, rc;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <path>\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	path = argv[1];
+
+	/* attributes: EFI_VARIABLE_NON_VOLATILE |
+	 *		EFI_VARIABLE_BOOTSERVICE_ACCESS |
+	 *		EFI_VARIABLE_RUNTIME_ACCESS
+	 */
+	*(uint32_t *)buf = 0x7;
+	buf[4] = 0;
+
+	/* create a test variable */
+	fd = open(path, O_WRONLY | O_CREAT);
+	if (fd < 0) {
+		perror("open(O_WRONLY)");
+		return EXIT_FAILURE;
+	}
+
+	rc = write(fd, buf, sizeof(buf));
+	if (rc != sizeof(buf)) {
+		perror("write");
+		return EXIT_FAILURE;
+	}
+
+	close(fd);
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		perror("open");
+		return EXIT_FAILURE;
+	}
+
+	if (unlink(path) < 0) {
+		perror("unlink");
+		return EXIT_FAILURE;
+	}
+
+	rc = read(fd, buf, sizeof(buf));
+	if (rc > 0) {
+		fprintf(stderr, "reading from an unlinked variable "
+				"shouldn't be possible\n");
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@ -0,0 +1,27 @@
+# Makefile for firmware loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+fw_filesystem:
+	@if /bin/sh ./fw_filesystem.sh ; then \
+                echo "fw_filesystem: ok"; \
+        else \
+                echo "fw_filesystem: [FAIL]"; \
+                exit 1; \
+        fi
+
+fw_userhelper:
+	@if /bin/sh ./fw_userhelper.sh ; then \
+                echo "fw_userhelper: ok"; \
+        else \
+                echo "fw_userhelper: [FAIL]"; \
+                exit 1; \
+        fi
+
+run_tests: all fw_filesystem fw_userhelper
+
+# Nothing to clean up.
+clean:
+
+.PHONY: all clean run_tests fw_filesystem fw_userhelper
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@ -0,0 +1,62 @@
+#!/bin/sh
+# This validates that the kernel will load firmware out of its list of
+# firmware locations on disk. Since the user helper does similar work,
+# we reset the custom load directory to a location the user helper doesn't
+# know so we can be sure we're not accidentally testing the user helper.
+set -e
+
+modprobe test_firmware
+
+DIR=/sys/devices/virtual/misc/test_firmware
+
+OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path)
+
+FWPATH=$(mktemp -d)
+FW="$FWPATH/test-firmware.bin"
+
+test_finish()
+{
+	echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+	echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path
+	rm -f "$FW"
+	rmdir "$FWPATH"
+}
+
+trap "test_finish" EXIT
+
+# Turn down the timeout so failures don't take so long.
+echo 1 >/sys/class/firmware/timeout
+# Set the kernel search path.
+echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
+
+# This is an unlikely real-world firmware content. :)
+echo "ABCD0123" >"$FW"
+
+NAME=$(basename "$FW")
+
+# Request a firmware that doesn't exist, it should fail.
+echo -n "nope-$NAME" >"$DIR"/trigger_request
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not expected to match" >&2
+	exit 1
+else
+	echo "$0: timeout works"
+fi
+
+# This should succeed via kernel load or will fail after 1 second after
+# being handed over to the user helper, which won't find the fw either.
+if ! echo -n "$NAME" >"$DIR"/trigger_request ; then
+	echo "$0: could not trigger request" >&2
+	exit 1
+fi
+
+# Verify the contents are what we expect.
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not loaded" >&2
+	exit 1
+else
+	echo "$0: filesystem loading works"
+fi
+
+exit 0
--- a/tools/testing/selftests/firmware/fw_userhelper.sh
+++ b/tools/testing/selftests/firmware/fw_userhelper.sh
@ -0,0 +1,89 @@
+#!/bin/sh
+# This validates that the kernel will fall back to using the user helper
+# to load firmware it can't find on disk itself. We must request a firmware
+# that the kernel won't find, and any installed helper (e.g. udev) also
+# won't find so that we can do the load ourself manually.
+set -e
+
+modprobe test_firmware
+
+DIR=/sys/devices/virtual/misc/test_firmware
+
+OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+
+FWPATH=$(mktemp -d)
+FW="$FWPATH/test-firmware.bin"
+
+test_finish()
+{
+	echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+	rm -f "$FW"
+	rmdir "$FWPATH"
+}
+
+load_fw()
+{
+	local name="$1"
+	local file="$2"
+
+	# This will block until our load (below) has finished.
+	echo -n "$name" >"$DIR"/trigger_request &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo 1 >"$DIR"/"$name"/loading
+	cat "$file" >"$DIR"/"$name"/data
+	echo 0 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
+trap "test_finish" EXIT
+
+# This is an unlikely real-world firmware content. :)
+echo "ABCD0123" >"$FW"
+NAME=$(basename "$FW")
+
+# Test failure when doing nothing (timeout works).
+echo 1 >/sys/class/firmware/timeout
+echo -n "$NAME" >"$DIR"/trigger_request
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not expected to match" >&2
+	exit 1
+else
+	echo "$0: timeout works"
+fi
+
+# Put timeout high enough for us to do work but not so long that failures
+# slow down this test too much.
+echo 4 >/sys/class/firmware/timeout
+
+# Load this script instead of the desired firmware.
+load_fw "$NAME" "$0"
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not expected to match" >&2
+	exit 1
+else
+	echo "$0: firmware comparison works"
+fi
+
+# Do a proper load, which should work correctly.
+load_fw "$NAME" "$FW"
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not loaded" >&2
+	exit 1
+else
+	echo "$0: user helper firmware loading works"
+fi
+
+exit 0
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@ -0,0 +1,7 @@
+all:
+
+run_tests:
+	@/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]"
+
+clean:
+	rm -rf logs/*
--- a/tools/testing/selftests/ftrace/README
+++ b/tools/testing/selftests/ftrace/README
@ -0,0 +1,82 @@
+Linux Ftrace Testcases
+
+This is a collection of testcases for ftrace tracing feature in the Linux
+kernel. Since ftrace exports interfaces via the debugfs, we just need
+shell scripts for testing. Feel free to add new test cases.
+
+Running the ftrace testcases
+============================
+
+At first, you need to be the root user to run this script.
+To run all testcases:
+
+  $ sudo ./ftracetest
+
+To run specific testcases:
+
+  # ./ftracetest test.d/basic3.tc
+
+Or you can also run testcases under given directory:
+
+  # ./ftracetest test.d/kprobe/
+
+Contributing new testcases
+==========================
+
+Copy test.d/template to your testcase (whose filename must have *.tc
+extension) and rewrite the test description line.
+
+ * The working directory of the script is <debugfs>/tracing/.
+
+ * Take care with side effects as the tests are run with root privilege.
+
+ * The tests should not run for a long period of time (more than 1 min.)
+   These are to be unit tests.
+
+ * You can add a directory for your testcases under test.d/ if needed.
+
+ * The test cases should run on dash (busybox shell) for testing on
+   minimal cross-build environments.
+
+ * Note that the tests are run with "set -e" (errexit) option. If any
+   command fails, the test will be terminated immediately.
+
+ * The tests can return some result codes instead of pass or fail by
+   using exit_unresolved, exit_untested, exit_unsupported and exit_xfail.
+
+Result code
+===========
+
+Ftracetest supports following result codes.
+
+ * PASS: The test succeeded as expected. The test which exits with 0 is
+         counted as passed test.
+
+ * FAIL: The test failed, but was expected to succeed. The test which exits
+         with !0 is counted as failed test.
+
+ * UNRESOLVED: The test produced unclear or intermidiate results.
+             for example, the test was interrupted
+                       or the test depends on a previous test, which failed.
+                       or the test was set up incorrectly
+             The test which is in above situation, must call exit_unresolved.
+
+ * UNTESTED: The test was not run, currently just a placeholder.
+             In this case, the test must call exit_untested.
+
+ * UNSUPPORTED: The test failed because of lack of feature.
+               In this case, the test must call exit_unsupported.
+
+ * XFAIL: The test failed, and was expected to fail.
+          To return XFAIL, call exit_xfail from the test.
+
+There are some sample test scripts for result code under samples/.
+You can also run samples as below:
+
+  # ./ftracetest samples/
+
+TODO
+====
+
+ * Fancy colored output :)
+
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@ -0,0 +1,253 @@
+#!/bin/sh
+
+# ftracetest - Ftrace test shell scripts
+#
+# Copyright (C) Hitachi Ltd., 2014
+#  Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+usage() { # errno [message]
+[ "$2" ] && echo $2
+echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
+echo " Options:"
+echo "		-h|--help  Show help message"
+echo "		-k|--keep  Keep passed test logs"
+echo "		-d|--debug Debug mode (trace all shell commands)"
+exit $1
+}
+
+errexit() { # message
+  echo "Error: $1" 1>&2
+  exit 1
+}
+
+# Ensuring user privilege
+if [ `id -u` -ne 0 ]; then
+  errexit "this must be run by root user"
+fi
+
+# Utilities
+absdir() { # file_path
+  (cd `dirname $1`; pwd)
+}
+
+abspath() {
+  echo `absdir $1`/`basename $1`
+}
+
+find_testcases() { #directory
+  echo `find $1 -name \*.tc`
+}
+
+parse_opts() { # opts
+  local OPT_TEST_CASES=
+  local OPT_TEST_DIR=
+
+  while [ "$1" ]; do
+    case "$1" in
+    --help|-h)
+      usage 0
+    ;;
+    --keep|-k)
+      KEEP_LOG=1
+      shift 1
+    ;;
+    --debug|-d)
+      DEBUG=1
+      shift 1
+    ;;
+    *.tc)
+      if [ -f "$1" ]; then
+        OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
+        shift 1
+      else
+        usage 1 "$1 is not a testcase"
+      fi
+      ;;
+    *)
+      if [ -d "$1" ]; then
+        OPT_TEST_DIR=`abspath $1`
+        OPT_TEST_CASES="$OPT_TEST_CASES `find_testcases $OPT_TEST_DIR`"
+        shift 1
+      else
+        usage 1 "Invalid option ($1)"
+      fi
+    ;;
+    esac
+  done
+  if [ "$OPT_TEST_CASES" ]; then
+    TEST_CASES=$OPT_TEST_CASES
+  fi
+}
+
+# Parameters
+DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
+TRACING_DIR=$DEBUGFS_DIR/tracing
+TOP_DIR=`absdir $0`
+TEST_DIR=$TOP_DIR/test.d
+TEST_CASES=`find_testcases $TEST_DIR`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
+KEEP_LOG=0
+DEBUG=0
+# Parse command-line options
+parse_opts $*
+
+[ $DEBUG -ne 0 ] && set -x
+
+# Verify parameters
+if [ -z "$DEBUGFS_DIR" -o ! -d "$TRACING_DIR" ]; then
+  errexit "No ftrace directory found"
+fi
+
+# Preparing logs
+LOG_FILE=$LOG_DIR/ftracetest.log
+mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+date > $LOG_FILE
+prlog() { # messages
+  echo "$@" | tee -a $LOG_FILE
+}
+catlog() { #file
+  cat $1 | tee -a $LOG_FILE
+}
+prlog "=== Ftrace unit tests ==="
+
+
+# Testcase management
+# Test result codes - Dejagnu extended code
+PASS=0	# The test succeeded.
+FAIL=1	# The test failed, but was expected to succeed.
+UNRESOLVED=2  # The test produced indeterminate results. (e.g. interrupted)
+UNTESTED=3    # The test was not run, currently just a placeholder.
+UNSUPPORTED=4 # The test failed because of lack of feature.
+XFAIL=5	# The test failed, and was expected to fail.
+
+# Accumulations
+PASSED_CASES=
+FAILED_CASES=
+UNRESOLVED_CASES=
+UNTESTED_CASES=
+UNSUPPORTED_CASES=
+XFAILED_CASES=
+UNDEFINED_CASES=
+TOTAL_RESULT=0
+
+CASENO=0
+testcase() { # testfile
+  CASENO=$((CASENO+1))
+  prlog -n "[$CASENO]"`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
+}
+
+eval_result() { # retval sigval
+  local retval=$2
+  if [ $2 -eq 0 ]; then
+    test $1 -ne 0 && retval=$FAIL
+  fi
+  case $retval in
+    $PASS)
+      prlog "	[PASS]"
+      PASSED_CASES="$PASSED_CASES $CASENO"
+      return 0
+    ;;
+    $FAIL)
+      prlog "	[FAIL]"
+      FAILED_CASES="$FAILED_CASES $CASENO"
+      return 1 # this is a bug.
+    ;;
+    $UNRESOLVED)
+      prlog "	[UNRESOLVED]"
+      UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
+      return 1 # this is a kind of bug.. something happened.
+    ;;
+    $UNTESTED)
+      prlog "	[UNTESTED]"
+      UNTESTED_CASES="$UNTESTED_CASES $CASENO"
+      return 0
+    ;;
+    $UNSUPPORTED)
+      prlog "	[UNSUPPORTED]"
+      UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
+      return 1 # this is not a bug, but the result should be reported.
+    ;;
+    $XFAIL)
+      prlog "	[XFAIL]"
+      XFAILED_CASES="$XFAILED_CASES $CASENO"
+      return 0
+    ;;
+    *)
+      prlog "	[UNDEFINED]"
+      UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
+      return 1 # this must be a test bug
+    ;;
+  esac
+}
+
+# Signal handling for result codes
+SIG_RESULT=
+SIG_BASE=36	# Use realtime signals
+SIG_PID=$$
+
+SIG_UNRESOLVED=$((SIG_BASE + UNRESOLVED))
+exit_unresolved () {
+  kill -s $SIG_UNRESOLVED $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$UNRESOLVED' $SIG_UNRESOLVED
+
+SIG_UNTESTED=$((SIG_BASE + UNTESTED))
+exit_untested () {
+  kill -s $SIG_UNTESTED $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$UNTESTED' $SIG_UNTESTED
+
+SIG_UNSUPPORTED=$((SIG_BASE + UNSUPPORTED))
+exit_unsupported () {
+  kill -s $SIG_UNSUPPORTED $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$UNSUPPORTED' $SIG_UNSUPPORTED
+
+SIG_XFAIL=$((SIG_BASE + XFAIL))
+exit_xfail () {
+  kill -s $SIG_XFAIL $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
+
+# Run one test case
+run_test() { # testfile
+  local testname=`basename $1`
+  local testlog=`mktemp --tmpdir=$LOG_DIR ${testname}-XXXXXX.log`
+  testcase $1
+  echo "execute: "$1 > $testlog
+  SIG_RESULT=0
+  # setup PID and PPID, $$ is not updated.
+  (cd $TRACING_DIR; read PID _ < /proc/self/stat ;
+   set -e; set -x; . $1) >> $testlog 2>&1
+  eval_result $? $SIG_RESULT
+  if [ $? -eq 0 ]; then
+    # Remove test log if the test was done as it was expected.
+    [ $KEEP_LOG -eq 0 ] && rm $testlog
+  else
+    catlog $testlog
+    TOTAL_RESULT=1
+  fi
+}
+
+# Main loop
+for t in $TEST_CASES; do
+  run_test $t
+done
+
+prlog ""
+prlog "# of passed: " `echo $PASSED_CASES | wc -w`
+prlog "# of failed: " `echo $FAILED_CASES | wc -w`
+prlog "# of unresolved: " `echo $UNRESOLVED_CASES | wc -w`
+prlog "# of untested: " `echo $UNTESTED_CASES | wc -w`
+prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
+prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
+prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+
+# if no error, return 0
+exit $TOTAL_RESULT
--- a/tools/testing/selftests/ftrace/samples/fail.tc
+++ b/tools/testing/selftests/ftrace/samples/fail.tc
@ -0,0 +1,4 @@
+#!/bin/sh
+# description: failure-case example
+cat non-exist-file
+echo "this is not executed"
--- a/tools/testing/selftests/ftrace/samples/pass.tc
+++ b/tools/testing/selftests/ftrace/samples/pass.tc
@ -0,0 +1,3 @@
+#!/bin/sh
+# description: pass-case example
+return 0
--- a/tools/testing/selftests/ftrace/samples/unresolved.tc
+++ b/tools/testing/selftests/ftrace/samples/unresolved.tc
@ -0,0 +1,4 @@
+#!/bin/sh
+# description: unresolved-case example
+trap exit_unresolved INT
+kill -INT $PID
--- a/tools/testing/selftests/ftrace/samples/unsupported.tc
+++ b/tools/testing/selftests/ftrace/samples/unsupported.tc
@ -0,0 +1,3 @@
+#!/bin/sh
+# description: unsupported-case example
+exit_unsupported
--- a/tools/testing/selftests/ftrace/samples/untested.tc
+++ b/tools/testing/selftests/ftrace/samples/untested.tc
@ -0,0 +1,3 @@
+#!/bin/sh
+# description: untested-case example
+exit_untested
--- a/tools/testing/selftests/ftrace/samples/xfail.tc
+++ b/tools/testing/selftests/ftrace/samples/xfail.tc
@ -0,0 +1,3 @@
+#!/bin/sh
+# description: xfail-case example
+cat non-exist-file || exit_xfail
--- a/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
@ -0,0 +1,3 @@
+#!/bin/sh
+# description: Basic trace file check
+test -f README -a -f trace -a -f tracing_on -a -f trace_pipe
--- a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
@ -0,0 +1,7 @@
+#!/bin/sh
+# description: Basic test for tracers
+test -f available_tracers
+for t in `cat available_tracers`; do
+  echo $t > current_tracer
+done
+echo nop > current_tracer
--- a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
@ -0,0 +1,8 @@
+#!/bin/sh
+# description: Basic trace clock test
+test -f trace_clock
+for c in `cat trace_clock | tr  -d \[\]`; do
+  echo $c > trace_clock
+  grep '\['$c'\]' trace_clock
+done
+echo local > trace_clock
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@ -0,0 +1,11 @@
+#!/bin/sh
+# description: Kprobe dynamic event - adding and removing
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent do_fork > kprobe_events
+grep myevent kprobe_events
+test -d events/kprobes/myevent
+echo > kprobe_events
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@ -0,0 +1,13 @@
+#!/bin/sh
+# description: Kprobe dynamic event - busy event check
+
+[ -f kprobe_events ] || exit_unsupported
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent do_fork > kprobe_events
+test -d events/kprobes/myevent
+echo 1 > events/kprobes/myevent/enable
+echo > kprobe_events && exit 1 # this must fail
+echo 0 > events/kprobes/myevent/enable
+echo > kprobe_events # this must succeed
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@ -0,0 +1,16 @@
+#!/bin/sh
+# description: Kprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe do_fork $stack $stack0 +0($stack)' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+echo "-:testprobe" >> kprobe_events
+test -d events/kprobes/testprobe && exit 1 || exit 0
+
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@ -0,0 +1,15 @@
+#!/bin/sh
+# description: Kretprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'r:testprobe2 do_fork $retval' > kprobe_events
+grep testprobe2 kprobe_events
+test -d events/kprobes/testprobe2
+echo 1 > events/kprobes/testprobe2/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe2/enable
+echo '-:testprobe2' >> kprobe_events
+test -d events/kprobes/testprobe2 && exit 1 || exit 0
--- a/tools/testing/selftests/ftrace/test.d/template
+++ b/tools/testing/selftests/ftrace/test.d/template
@ -0,0 +1,9 @@
+#!/bin/sh
+# description: %HERE DESCRIBE WHAT THIS DOES%
+# you have to add ".tc" extention for your testcase file
+# Note that all tests are run with "errexit" option.
+
+exit 0 # Return 0 if the test is passed, otherwise return !0
+# If the test could not run because of lack of feature, call exit_unsupported
+# If the test returned unclear results, call exit_unresolved
+# If the test is a dummy, or a placeholder, call exit_untested
--- a/tools/testing/selftests/ipc/Makefile
+++ b/tools/testing/selftests/ipc/Makefile
@ -0,0 +1,25 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := x86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := x86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../usr/include/
+
+all:
+ifeq ($(ARCH),x86)
+	gcc $(CFLAGS) msgque.c -o msgque_test
+else
+	echo "Not an x86 target, can't build msgque selftest"
+endif
+
+run_tests: all
+	./msgque_test
+
+clean:
+	rm -fr ./msgque_test
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@ -0,0 +1,252 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/msg.h>
+#include <fcntl.h>
+
+#define MAX_MSG_SIZE		32
+
+struct msg1 {
+	int msize;
+	long mtype;
+	char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+struct msgque_data {
+	key_t key;
+	int msq_id;
+	int qbytes;
+	int qnum;
+	int mode;
+	struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+	int fd, ret, id, i;
+	char buf[32];
+
+	fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
+	if (fd == -1) {
+		printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+	sprintf(buf, "%d", msgque->msq_id);
+
+	ret = write(fd, buf, strlen(buf));
+	if (ret != strlen(buf)) {
+		printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+
+	id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
+	if (id == -1) {
+		printf("Failed to create queue\n");
+		return -errno;
+	}
+
+	if (id != msgque->msq_id) {
+		printf("Restored queue has wrong id (%d instead of %d)\n",
+							id, msgque->msq_id);
+		ret = -EFAULT;
+		goto destroy;
+	}
+
+	for (i = 0; i < msgque->qnum; i++) {
+		if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
+			   msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+			printf("msgsnd failed (%m)\n");
+			ret = -errno;
+			goto destroy;
+		};
+	}
+	return 0;
+
+destroy:
+	if (msgctl(id, IPC_RMID, 0))
+		printf("Failed to destroy queue: %d\n", -errno);
+	return ret;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+	struct msg1 message;
+	int cnt = 0, ret;
+
+	while (1) {
+		ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
+				0, IPC_NOWAIT);
+		if (ret < 0) {
+			if (errno == ENOMSG)
+				break;
+			printf("Failed to read IPC message: %m\n");
+			ret = -errno;
+			goto err;
+		}
+		if (ret != msgque->messages[cnt].msize) {
+			printf("Wrong message size: %d (expected %d)\n", ret,
+						msgque->messages[cnt].msize);
+			ret = -EINVAL;
+			goto err;
+		}
+		if (message.mtype != msgque->messages[cnt].mtype) {
+			printf("Wrong message type\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+			printf("Wrong message content\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		cnt++;
+	}
+
+	if (cnt != msgque->qnum) {
+		printf("Wrong message number\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = 0;
+err:
+	if (msgctl(msgque->msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+	return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+	struct msqid64_ds ds;
+	int kern_id;
+	int i, ret;
+
+	for (kern_id = 0; kern_id < 256; kern_id++) {
+		ret = msgctl(kern_id, MSG_STAT, &ds);
+		if (ret < 0) {
+			if (errno == -EINVAL)
+				continue;
+			printf("Failed to get stats for IPC queue with id %d\n",
+					kern_id);
+			return -errno;
+		}
+
+		if (ret == msgque->msq_id)
+			break;
+	}
+
+	msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+	if (msgque->messages == NULL) {
+		printf("Failed to get stats for IPC queue\n");
+		return -ENOMEM;
+	}
+
+	msgque->qnum = ds.msg_qnum;
+	msgque->mode = ds.msg_perm.mode;
+	msgque->qbytes = ds.msg_qbytes;
+
+	for (i = 0; i < msgque->qnum; i++) {
+		ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
+				MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+		if (ret < 0) {
+			printf("Failed to copy IPC message: %m (%d)\n", errno);
+			return -errno;
+		}
+		msgque->messages[i].msize = ret;
+	}
+	return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+	struct msg1 msgbuf;
+
+	msgbuf.mtype = MSG_TYPE;
+	memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("First message send failed (%m)\n");
+		return -errno;
+	};
+
+	msgbuf.mtype = ANOTHER_MSG_TYPE;
+	memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("Second message send failed (%m)\n");
+		return -errno;
+	};
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int msg, pid, err;
+	struct msgque_data msgque;
+
+	if (getuid() != 0) {
+		printf("Please run the test as root - Exiting.\n");
+		exit(1);
+	}
+
+	msgque.key = ftok(argv[0], 822155650);
+	if (msgque.key == -1) {
+		printf("Can't make key\n");
+		return -errno;
+	}
+
+	msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
+	if (msgque.msq_id == -1) {
+		err = -errno;
+		printf("Can't create queue\n");
+		goto err_out;
+	}
+
+	err = fill_msgque(&msgque);
+	if (err) {
+		printf("Failed to fill queue\n");
+		goto err_destroy;
+	}
+
+	err = dump_queue(&msgque);
+	if (err) {
+		printf("Failed to dump queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to check and destroy queue\n");
+		goto err_out;
+	}
+
+	err = restore_queue(&msgque);
+	if (err) {
+		printf("Failed to restore queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to test queue\n");
+		goto err_out;
+	}
+	return 0;
+
+err_destroy:
+	if (msgctl(msgque.msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+err_out:
+	return err;
+}
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@ -0,0 +1,28 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := x86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := x86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../arch/x86/include/generated/
+CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../arch/x86/include/
+
+all:
+ifeq ($(ARCH),x86)
+	gcc $(CFLAGS) kcmp_test.c -o kcmp_test
+else
+	echo "Not an x86 target, can't build kcmp selftest"
+endif
+
+run_tests: all
+	@./kcmp_test || echo "kcmp_test: [FAIL]"
+
+clean:
+	$(RM) kcmp_test kcmp-test-file
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@ -0,0 +1,96 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include <linux/unistd.h>
+#include <linux/kcmp.h>
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2)
+{
+	return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
+}
+
+int main(int argc, char **argv)
+{
+	const char kpath[] = "kcmp-test-file";
+	int pid1, pid2;
+	int fd1, fd2;
+	int status;
+
+	fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
+	pid1 = getpid();
+
+	if (fd1 < 0) {
+		perror("Can't create file");
+		exit(1);
+	}
+
+	pid2 = fork();
+	if (pid2 < 0) {
+		perror("fork failed");
+		exit(1);
+	}
+
+	if (!pid2) {
+		int pid2 = getpid();
+		int ret;
+
+		fd2 = open(kpath, O_RDWR, 0644);
+		if (fd2 < 0) {
+			perror("Can't open file");
+			exit(1);
+		}
+
+		/* An example of output and arguments */
+		printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld "
+		       "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld "
+		       "INV: %2ld\n",
+		       pid1, pid2,
+		       sys_kcmp(pid1, pid2, KCMP_FILE,		fd1, fd2),
+		       sys_kcmp(pid1, pid2, KCMP_FILES,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_VM,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_FS,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_SIGHAND,	0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_IO,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_SYSVSEM,	0, 0),
+
+			/* This one should fail */
+		       sys_kcmp(pid1, pid2, KCMP_TYPES + 1,	0, 0));
+
+		/* This one should return same fd */
+		ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
+			ret = -1;
+		} else
+			printf("PASS: 0 returned as expected\n");
+
+		/* Compare with self */
+		ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
+			ret = -1;
+		} else
+			printf("PASS: 0 returned as expected\n");
+
+		exit(ret);
+	}
+
+	waitpid(pid2, &status, P_ALL);
+
+	return 0;
+}
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@ -0,0 +1,20 @@
+CFLAGS += -D_FILE_OFFSET_BITS=64
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+
+all:
+	gcc $(CFLAGS) memfd_test.c -o memfd_test
+
+run_tests: all
+	gcc $(CFLAGS) memfd_test.c -o memfd_test
+	@./memfd_test || echo "memfd_test: [FAIL]"
+
+build_fuse:
+	gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
+	gcc $(CFLAGS) fuse_test.c -o fuse_test
+
+run_fuse: build_fuse
+	@./run_fuse_test.sh || echo "fuse_test: [FAIL]"
+
+clean:
+	$(RM) memfd_test fuse_test
--- a/tools/testing/selftests/memfd/fuse_mnt.c
+++ b/tools/testing/selftests/memfd/fuse_mnt.c
@ -0,0 +1,110 @@
+/*
+ * memfd test file-system
+ * This file uses FUSE to create a dummy file-system with only one file /memfd.
+ * This file is read-only and takes 1s per read.
+ *
+ * This file-system is used by the memfd test-cases to force the kernel to pin
+ * pages during reads(). Due to the 1s delay of this file-system, this is a
+ * nice way to test race-conditions against get_user_pages() in the kernel.
+ *
+ * We use direct_io==1 to force the kernel to use direct-IO for this
+ * file-system.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static const char memfd_content[] = "memfd-example-content";
+static const char memfd_path[] = "/memfd";
+
+static int memfd_getattr(const char *path, struct stat *st)
+{
+	memset(st, 0, sizeof(*st));
+
+	if (!strcmp(path, "/")) {
+		st->st_mode = S_IFDIR | 0755;
+		st->st_nlink = 2;
+	} else if (!strcmp(path, memfd_path)) {
+		st->st_mode = S_IFREG | 0444;
+		st->st_nlink = 1;
+		st->st_size = strlen(memfd_content);
+	} else {
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int memfd_readdir(const char *path,
+			 void *buf,
+			 fuse_fill_dir_t filler,
+			 off_t offset,
+			 struct fuse_file_info *fi)
+{
+	if (strcmp(path, "/"))
+		return -ENOENT;
+
+	filler(buf, ".", NULL, 0);
+	filler(buf, "..", NULL, 0);
+	filler(buf, memfd_path + 1, NULL, 0);
+
+	return 0;
+}
+
+static int memfd_open(const char *path, struct fuse_file_info *fi)
+{
+	if (strcmp(path, memfd_path))
+		return -ENOENT;
+
+	if ((fi->flags & 3) != O_RDONLY)
+		return -EACCES;
+
+	/* force direct-IO */
+	fi->direct_io = 1;
+
+	return 0;
+}
+
+static int memfd_read(const char *path,
+		      char *buf,
+		      size_t size,
+		      off_t offset,
+		      struct fuse_file_info *fi)
+{
+	size_t len;
+
+	if (strcmp(path, memfd_path) != 0)
+		return -ENOENT;
+
+	sleep(1);
+
+	len = strlen(memfd_content);
+	if (offset < len) {
+		if (offset + size > len)
+			size = len - offset;
+
+		memcpy(buf, memfd_content + offset, size);
+	} else {
+		size = 0;
+	}
+
+	return size;
+}
+
+static struct fuse_operations memfd_ops = {
+	.getattr	= memfd_getattr,
+	.readdir	= memfd_readdir,
+	.open		= memfd_open,
+	.read		= memfd_read,
+};
+
+int main(int argc, char *argv[])
+{
+	return fuse_main(argc, argv, &memfd_ops, NULL);
+}
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@ -0,0 +1,311 @@
+/*
+ * memfd GUP test-case
+ * This tests memfd interactions with get_user_pages(). We require the
+ * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
+ * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
+ * read() on files in that file-system will pin the receive-buffer pages for at
+ * least 1s via get_user_pages().
+ *
+ * We use this trick to race ADD_SEALS against a write on a memfd object. The
+ * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
+ * the read() syscall with our memory-mapped memfd object as receive buffer to
+ * force the kernel to write into our memfd object.
+ */
+
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65535
+
+static int sys_memfd_create(const char *name,
+			    unsigned int flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+	int r, fd;
+
+	fd = sys_memfd_create(name, flags);
+	if (fd < 0) {
+		printf("memfd_create(\"%s\", %u) failed: %m\n",
+		       name, flags);
+		abort();
+	}
+
+	r = ftruncate(fd, sz);
+	if (r < 0) {
+		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+		abort();
+	}
+
+	return fd;
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+	long r;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0) {
+		printf("GET_SEALS(%d) failed: %m\n", fd);
+		abort();
+	}
+
+	return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+	__u64 s;
+
+	s = mfd_assert_get_seals(fd);
+	if (s != seals) {
+		printf("%llu != %llu = GET_SEALS(%d)\n",
+		       (unsigned long long)seals, (unsigned long long)s, fd);
+		abort();
+	}
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+	long r;
+	__u64 s;
+
+	s = mfd_assert_get_seals(fd);
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r < 0) {
+		printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+		       fd, (unsigned long long)s, (unsigned long long)seals);
+		abort();
+	}
+}
+
+static int mfd_busy_add_seals(int fd, __u64 seals)
+{
+	long r;
+	__u64 s;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0)
+		s = 0;
+	else
+		s = r;
+
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r < 0 && errno != EBUSY) {
+		printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
+		       fd, (unsigned long long)s, (unsigned long long)seals);
+		abort();
+	}
+
+	return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static int global_mfd = -1;
+static void *global_p = NULL;
+
+static int sealing_thread_fn(void *arg)
+{
+	int sig, r;
+
+	/*
+	 * This thread first waits 200ms so any pending operation in the parent
+	 * is correctly started. After that, it tries to seal @global_mfd as
+	 * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
+	 * that memory mapped object still ongoing.
+	 * We then wait one more second and try sealing again. This time it
+	 * must succeed as there shouldn't be anyone else pinning the pages.
+	 */
+
+	/* wait 200ms for FUSE-request to be active */
+	usleep(200000);
+
+	/* unmount mapping before sealing to avoid i_mmap_writable failures */
+	munmap(global_p, MFD_DEF_SIZE);
+
+	/* Try sealing the global file; expect EBUSY or success. Current
+	 * kernels will never succeed, but in the future, kernels might
+	 * implement page-replacements or other fancy ways to avoid racing
+	 * writes. */
+	r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
+	if (r >= 0) {
+		printf("HURRAY! This kernel fixed GUP races!\n");
+	} else {
+		/* wait 1s more so the FUSE-request is done */
+		sleep(1);
+
+		/* try sealing the global file again */
+		mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
+	}
+
+	return 0;
+}
+
+static pid_t spawn_sealing_thread(void)
+{
+	uint8_t *stack;
+	pid_t pid;
+
+	stack = malloc(STACK_SIZE);
+	if (!stack) {
+		printf("malloc(STACK_SIZE) failed: %m\n");
+		abort();
+	}
+
+	pid = clone(sealing_thread_fn,
+		    stack + STACK_SIZE,
+		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
+		    NULL);
+	if (pid < 0) {
+		printf("clone() failed: %m\n");
+		abort();
+	}
+
+	return pid;
+}
+
+static void join_sealing_thread(pid_t pid)
+{
+	waitpid(pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+	static const char zero[MFD_DEF_SIZE];
+	int fd, mfd, r;
+	void *p;
+	int was_sealed;
+	pid_t pid;
+
+	if (argc < 2) {
+		printf("error: please pass path to file in fuse_mnt mount-point\n");
+		abort();
+	}
+
+	/* open FUSE memfd file for GUP testing */
+	printf("opening: %s\n", argv[1]);
+	fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		printf("cannot open(\"%s\"): %m\n", argv[1]);
+		abort();
+	}
+
+	/* create new memfd-object */
+	mfd = mfd_assert_new("kern_memfd_fuse",
+			     MFD_DEF_SIZE,
+			     MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+	/* mmap memfd-object for writing */
+	p = mfd_assert_mmap_shared(mfd);
+
+	/* pass mfd+mapping to a separate sealing-thread which tries to seal
+	 * the memfd objects with SEAL_WRITE while we write into it */
+	global_mfd = mfd;
+	global_p = p;
+	pid = spawn_sealing_thread();
+
+	/* Use read() on the FUSE file to read into our memory-mapped memfd
+	 * object. This races the other thread which tries to seal the
+	 * memfd-object.
+	 * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
+	 * This guarantees that the receive-buffer is pinned for 1s until the
+	 * data is written into it. The racing ADD_SEALS should thus fail as
+	 * the pages are still pinned. */
+	r = read(fd, p, MFD_DEF_SIZE);
+	if (r < 0) {
+		printf("read() failed: %m\n");
+		abort();
+	} else if (!r) {
+		printf("unexpected EOF on read()\n");
+		abort();
+	}
+
+	was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
+
+	/* Wait for sealing-thread to finish and verify that it
+	 * successfully sealed the file after the second try. */
+	join_sealing_thread(pid);
+	mfd_assert_has_seals(mfd, F_SEAL_WRITE);
+
+	/* *IF* the memfd-object was sealed at the time our read() returned,
+	 * then the kernel did a page-replacement or canceled the read() (or
+	 * whatever magic it did..). In that case, the memfd object is still
+	 * all zero.
+	 * In case the memfd-object was *not* sealed, the read() was successfull
+	 * and the memfd object must *not* be all zero.
+	 * Note that in real scenarios, there might be a mixture of both, but
+	 * in this test-cases, we have explicit 200ms delays which should be
+	 * enough to avoid any in-flight writes. */
+
+	p = mfd_assert_mmap_private(mfd);
+	if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
+		printf("memfd sealed during read() but data not discarded\n");
+		abort();
+	} else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
+		printf("memfd sealed after read() but data discarded\n");
+		abort();
+	}
+
+	close(mfd);
+	close(fd);
+
+	printf("fuse: DONE\n");
+
+	return 0;
+}
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@ -0,0 +1,911 @@
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65535
+
+static int sys_memfd_create(const char *name,
+			    unsigned int flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+	int r, fd;
+
+	fd = sys_memfd_create(name, flags);
+	if (fd < 0) {
+		printf("memfd_create(\"%s\", %u) failed: %m\n",
+		       name, flags);
+		abort();
+	}
+
+	r = ftruncate(fd, sz);
+	if (r < 0) {
+		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+		abort();
+	}
+
+	return fd;
+}
+
+static void mfd_fail_new(const char *name, unsigned int flags)
+{
+	int r;
+
+	r = sys_memfd_create(name, flags);
+	if (r >= 0) {
+		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
+		       name, flags);
+		close(r);
+		abort();
+	}
+}
+
+static unsigned int mfd_assert_get_seals(int fd)
+{
+	int r;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0) {
+		printf("GET_SEALS(%d) failed: %m\n", fd);
+		abort();
+	}
+
+	return (unsigned int)r;
+}
+
+static void mfd_assert_has_seals(int fd, unsigned int seals)
+{
+	unsigned int s;
+
+	s = mfd_assert_get_seals(fd);
+	if (s != seals) {
+		printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
+		abort();
+	}
+}
+
+static void mfd_assert_add_seals(int fd, unsigned int seals)
+{
+	int r;
+	unsigned int s;
+
+	s = mfd_assert_get_seals(fd);
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r < 0) {
+		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
+		abort();
+	}
+}
+
+static void mfd_fail_add_seals(int fd, unsigned int seals)
+{
+	int r;
+	unsigned int s;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0)
+		s = 0;
+	else
+		s = (unsigned int)r;
+
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r >= 0) {
+		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
+				fd, s, seals);
+		abort();
+	}
+}
+
+static void mfd_assert_size(int fd, size_t size)
+{
+	struct stat st;
+	int r;
+
+	r = fstat(fd, &st);
+	if (r < 0) {
+		printf("fstat(%d) failed: %m\n", fd);
+		abort();
+	} else if (st.st_size != size) {
+		printf("wrong file size %lld, but expected %lld\n",
+		       (long long)st.st_size, (long long)size);
+		abort();
+	}
+}
+
+static int mfd_assert_dup(int fd)
+{
+	int r;
+
+	r = dup(fd);
+	if (r < 0) {
+		printf("dup(%d) failed: %m\n", fd);
+		abort();
+	}
+
+	return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static int mfd_assert_open(int fd, int flags, mode_t mode)
+{
+	char buf[512];
+	int r;
+
+	sprintf(buf, "/proc/self/fd/%d", fd);
+	r = open(buf, flags, mode);
+	if (r < 0) {
+		printf("open(%s) failed: %m\n", buf);
+		abort();
+	}
+
+	return r;
+}
+
+static void mfd_fail_open(int fd, int flags, mode_t mode)
+{
+	char buf[512];
+	int r;
+
+	sprintf(buf, "/proc/self/fd/%d", fd);
+	r = open(buf, flags, mode);
+	if (r >= 0) {
+		printf("open(%s) didn't fail as expected\n", buf);
+		abort();
+	}
+}
+
+static void mfd_assert_read(int fd)
+{
+	char buf[16];
+	void *p;
+	ssize_t l;
+
+	l = read(fd, buf, sizeof(buf));
+	if (l != sizeof(buf)) {
+		printf("read() failed: %m\n");
+		abort();
+	}
+
+	/* verify PROT_READ *is* allowed */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	munmap(p, MFD_DEF_SIZE);
+
+	/* verify MAP_PRIVATE is *always* allowed (even writable) */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	munmap(p, MFD_DEF_SIZE);
+}
+
+static void mfd_assert_write(int fd)
+{
+	ssize_t l;
+	void *p;
+	int r;
+
+	/* verify write() succeeds */
+	l = write(fd, "\0\0\0\0", 4);
+	if (l != 4) {
+		printf("write() failed: %m\n");
+		abort();
+	}
+
+	/* verify PROT_READ | PROT_WRITE is allowed */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	*(char *)p = 0;
+	munmap(p, MFD_DEF_SIZE);
+
+	/* verify PROT_WRITE is allowed */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	*(char *)p = 0;
+	munmap(p, MFD_DEF_SIZE);
+
+	/* verify PROT_READ with MAP_SHARED is allowed and a following
+	 * mprotect(PROT_WRITE) allows writing */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+	if (r < 0) {
+		printf("mprotect() failed: %m\n");
+		abort();
+	}
+
+	*(char *)p = 0;
+	munmap(p, MFD_DEF_SIZE);
+
+	/* verify PUNCH_HOLE works */
+	r = fallocate(fd,
+		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+		      0,
+		      MFD_DEF_SIZE);
+	if (r < 0) {
+		printf("fallocate(PUNCH_HOLE) failed: %m\n");
+		abort();
+	}
+}
+
+static void mfd_fail_write(int fd)
+{
+	ssize_t l;
+	void *p;
+	int r;
+
+	/* verify write() fails */
+	l = write(fd, "data", 4);
+	if (l != -EPERM) {
+		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
+		abort();
+	}
+
+	/* verify PROT_READ | PROT_WRITE is not allowed */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p != MAP_FAILED) {
+		printf("mmap() didn't fail as expected\n");
+		abort();
+	}
+
+	/* verify PROT_WRITE is not allowed */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p != MAP_FAILED) {
+		printf("mmap() didn't fail as expected\n");
+		abort();
+	}
+
+	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
+	 * allowed. Note that for r/w the kernel already prevents the mmap. */
+	p = mmap(NULL,
+		 MFD_DEF_SIZE,
+		 PROT_READ,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p != MAP_FAILED) {
+		r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+		if (r >= 0) {
+			printf("mmap()+mprotect() didn't fail as expected\n");
+			abort();
+		}
+	}
+
+	/* verify PUNCH_HOLE fails */
+	r = fallocate(fd,
+		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+		      0,
+		      MFD_DEF_SIZE);
+	if (r >= 0) {
+		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
+		abort();
+	}
+}
+
+static void mfd_assert_shrink(int fd)
+{
+	int r, fd2;
+
+	r = ftruncate(fd, MFD_DEF_SIZE / 2);
+	if (r < 0) {
+		printf("ftruncate(SHRINK) failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, MFD_DEF_SIZE / 2);
+
+	fd2 = mfd_assert_open(fd,
+			      O_RDWR | O_CREAT | O_TRUNC,
+			      S_IRUSR | S_IWUSR);
+	close(fd2);
+
+	mfd_assert_size(fd, 0);
+}
+
+static void mfd_fail_shrink(int fd)
+{
+	int r;
+
+	r = ftruncate(fd, MFD_DEF_SIZE / 2);
+	if (r >= 0) {
+		printf("ftruncate(SHRINK) didn't fail as expected\n");
+		abort();
+	}
+
+	mfd_fail_open(fd,
+		      O_RDWR | O_CREAT | O_TRUNC,
+		      S_IRUSR | S_IWUSR);
+}
+
+static void mfd_assert_grow(int fd)
+{
+	int r;
+
+	r = ftruncate(fd, MFD_DEF_SIZE * 2);
+	if (r < 0) {
+		printf("ftruncate(GROW) failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, MFD_DEF_SIZE * 2);
+
+	r = fallocate(fd,
+		      0,
+		      0,
+		      MFD_DEF_SIZE * 4);
+	if (r < 0) {
+		printf("fallocate(ALLOC) failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, MFD_DEF_SIZE * 4);
+}
+
+static void mfd_fail_grow(int fd)
+{
+	int r;
+
+	r = ftruncate(fd, MFD_DEF_SIZE * 2);
+	if (r >= 0) {
+		printf("ftruncate(GROW) didn't fail as expected\n");
+		abort();
+	}
+
+	r = fallocate(fd,
+		      0,
+		      0,
+		      MFD_DEF_SIZE * 4);
+	if (r >= 0) {
+		printf("fallocate(ALLOC) didn't fail as expected\n");
+		abort();
+	}
+}
+
+static void mfd_assert_grow_write(int fd)
+{
+	static char buf[MFD_DEF_SIZE * 8];
+	ssize_t l;
+
+	l = pwrite(fd, buf, sizeof(buf), 0);
+	if (l != sizeof(buf)) {
+		printf("pwrite() failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, MFD_DEF_SIZE * 8);
+}
+
+static void mfd_fail_grow_write(int fd)
+{
+	static char buf[MFD_DEF_SIZE * 8];
+	ssize_t l;
+
+	l = pwrite(fd, buf, sizeof(buf), 0);
+	if (l == sizeof(buf)) {
+		printf("pwrite() didn't fail as expected\n");
+		abort();
+	}
+}
+
+static int idle_thread_fn(void *arg)
+{
+	sigset_t set;
+	int sig;
+
+	/* dummy waiter; SIGTERM terminates us anyway */
+	sigemptyset(&set);
+	sigaddset(&set, SIGTERM);
+	sigwait(&set, &sig);
+
+	return 0;
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+	uint8_t *stack;
+	pid_t pid;
+
+	stack = malloc(STACK_SIZE);
+	if (!stack) {
+		printf("malloc(STACK_SIZE) failed: %m\n");
+		abort();
+	}
+
+	pid = clone(idle_thread_fn,
+		    stack + STACK_SIZE,
+		    SIGCHLD | flags,
+		    NULL);
+	if (pid < 0) {
+		printf("clone() failed: %m\n");
+		abort();
+	}
+
+	return pid;
+}
+
+static void join_idle_thread(pid_t pid)
+{
+	kill(pid, SIGTERM);
+	waitpid(pid, NULL, 0);
+}
+
+/*
+ * Test memfd_create() syscall
+ * Verify syscall-argument validation, including name checks, flag validation
+ * and more.
+ */
+static void test_create(void)
+{
+	char buf[2048];
+	int fd;
+
+	/* test NULL name */
+	mfd_fail_new(NULL, 0);
+
+	/* test over-long name (not zero-terminated) */
+	memset(buf, 0xff, sizeof(buf));
+	mfd_fail_new(buf, 0);
+
+	/* test over-long zero-terminated name */
+	memset(buf, 0xff, sizeof(buf));
+	buf[sizeof(buf) - 1] = 0;
+	mfd_fail_new(buf, 0);
+
+	/* verify "" is a valid name */
+	fd = mfd_assert_new("", 0, 0);
+	close(fd);
+
+	/* verify invalid O_* open flags */
+	mfd_fail_new("", 0x0100);
+	mfd_fail_new("", ~MFD_CLOEXEC);
+	mfd_fail_new("", ~MFD_ALLOW_SEALING);
+	mfd_fail_new("", ~0);
+	mfd_fail_new("", 0x80000000U);
+
+	/* verify MFD_CLOEXEC is allowed */
+	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
+	close(fd);
+
+	/* verify MFD_ALLOW_SEALING is allowed */
+	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+	close(fd);
+
+	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+	close(fd);
+}
+
+/*
+ * Test basic sealing
+ * A very basic sealing test to see whether setting/retrieving seals works.
+ */
+static void test_basic(void)
+{
+	int fd;
+
+	fd = mfd_assert_new("kern_memfd_basic",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+	/* add basic seals */
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+
+	/* add them again */
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+
+	/* add more seals and seal against sealing */
+	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_GROW |
+				 F_SEAL_WRITE |
+				 F_SEAL_SEAL);
+
+	/* verify that sealing no longer works */
+	mfd_fail_add_seals(fd, F_SEAL_GROW);
+	mfd_fail_add_seals(fd, 0);
+
+	close(fd);
+
+	/* verify sealing does not work without MFD_ALLOW_SEALING */
+	fd = mfd_assert_new("kern_memfd_basic",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
+			       F_SEAL_GROW |
+			       F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+	close(fd);
+}
+
+/*
+ * Test SEAL_WRITE
+ * Test whether SEAL_WRITE actually prevents modifications.
+ */
+static void test_seal_write(void)
+{
+	int fd;
+
+	fd = mfd_assert_new("kern_memfd_seal_write",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+	mfd_assert_read(fd);
+	mfd_fail_write(fd);
+	mfd_assert_shrink(fd);
+	mfd_assert_grow(fd);
+	mfd_fail_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK
+ * Test whether SEAL_SHRINK actually prevents shrinking
+ */
+static void test_seal_shrink(void)
+{
+	int fd;
+
+	fd = mfd_assert_new("kern_memfd_seal_shrink",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_fail_shrink(fd);
+	mfd_assert_grow(fd);
+	mfd_assert_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test SEAL_GROW
+ * Test whether SEAL_GROW actually prevents growing
+ */
+static void test_seal_grow(void)
+{
+	int fd;
+
+	fd = mfd_assert_new("kern_memfd_seal_grow",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_GROW);
+	mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_assert_shrink(fd);
+	mfd_fail_grow(fd);
+	mfd_fail_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK | SEAL_GROW
+ * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
+ */
+static void test_seal_resize(void)
+{
+	int fd;
+
+	fd = mfd_assert_new("kern_memfd_seal_resize",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_fail_shrink(fd);
+	mfd_fail_grow(fd);
+	mfd_fail_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test sharing via dup()
+ * Test that seals are shared between dupped FDs and they're all equal.
+ */
+static void test_share_dup(void)
+{
+	int fd, fd2;
+
+	fd = mfd_assert_new("kern_memfd_share_dup",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	fd2 = mfd_assert_dup(fd);
+	mfd_assert_has_seals(fd2, 0);
+
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+	mfd_assert_add_seals(fd, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+	mfd_fail_add_seals(fd, F_SEAL_GROW);
+	mfd_fail_add_seals(fd2, F_SEAL_GROW);
+	mfd_fail_add_seals(fd, F_SEAL_SEAL);
+	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
+
+	close(fd2);
+
+	mfd_fail_add_seals(fd, F_SEAL_GROW);
+	close(fd);
+}
+
+/*
+ * Test sealing with active mmap()s
+ * Modifying seals is only allowed if no other mmap() refs exist.
+ */
+static void test_share_mmap(void)
+{
+	int fd;
+	void *p;
+
+	fd = mfd_assert_new("kern_memfd_share_mmap",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	/* shared/writable ref prevents sealing WRITE, but allows others */
+	p = mfd_assert_mmap_shared(fd);
+	mfd_fail_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+	munmap(p, MFD_DEF_SIZE);
+
+	/* readable ref allows sealing */
+	p = mfd_assert_mmap_private(fd);
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	munmap(p, MFD_DEF_SIZE);
+
+	close(fd);
+}
+
+/*
+ * Test sealing with open(/proc/self/fd/%d)
+ * Via /proc we can get access to a separate file-context for the same memfd.
+ * This is *not* like dup(), but like a real separate open(). Make sure the
+ * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
+ */
+static void test_share_open(void)
+{
+	int fd, fd2;
+
+	fd = mfd_assert_new("kern_memfd_share_open",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	fd2 = mfd_assert_open(fd, O_RDWR, 0);
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+	close(fd);
+	fd = mfd_assert_open(fd2, O_RDONLY, 0);
+
+	mfd_fail_add_seals(fd, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+	close(fd2);
+	fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+	close(fd2);
+	close(fd);
+}
+
+/*
+ * Test sharing via fork()
+ * Test whether seal-modifications work as expected with forked childs.
+ */
+static void test_share_fork(void)
+{
+	int fd;
+	pid_t pid;
+
+	fd = mfd_assert_new("kern_memfd_share_fork",
+			    MFD_DEF_SIZE,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	pid = spawn_idle_thread(0);
+	mfd_assert_add_seals(fd, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+	mfd_fail_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+	join_idle_thread(pid);
+
+	mfd_fail_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	pid_t pid;
+
+	printf("memfd: CREATE\n");
+	test_create();
+	printf("memfd: BASIC\n");
+	test_basic();
+
+	printf("memfd: SEAL-WRITE\n");
+	test_seal_write();
+	printf("memfd: SEAL-SHRINK\n");
+	test_seal_shrink();
+	printf("memfd: SEAL-GROW\n");
+	test_seal_grow();
+	printf("memfd: SEAL-RESIZE\n");
+	test_seal_resize();
+
+	printf("memfd: SHARE-DUP\n");
+	test_share_dup();
+	printf("memfd: SHARE-MMAP\n");
+	test_share_mmap();
+	printf("memfd: SHARE-OPEN\n");
+	test_share_open();
+	printf("memfd: SHARE-FORK\n");
+	test_share_fork();
+
+	/* Run test-suite in a multi-threaded environment with a shared
+	 * file-table. */
+	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
+	printf("memfd: SHARE-DUP (shared file-table)\n");
+	test_share_dup();
+	printf("memfd: SHARE-MMAP (shared file-table)\n");
+	test_share_mmap();
+	printf("memfd: SHARE-OPEN (shared file-table)\n");
+	test_share_open();
+	printf("memfd: SHARE-FORK (shared file-table)\n");
+	test_share_fork();
+	join_idle_thread(pid);
+
+	printf("memfd: DONE\n");
+
+	return 0;
+}
--- a/tools/testing/selftests/memfd/run_fuse_test.sh
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@ -0,0 +1,14 @@
+#!/bin/sh
+
+if test -d "./mnt" ; then
+	fusermount -u ./mnt
+	rmdir ./mnt
+fi
+
+set -e
+
+mkdir mnt
+./fuse_mnt ./mnt
+./fuse_test ./mnt/memfd
+fusermount -u ./mnt
+rmdir ./mnt
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@ -0,0 +1,9 @@
+all:
+
+run_tests:
+	@/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]"
+
+run_full_test:
+	@/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
+
+clean:
--- a/tools/testing/selftests/memory-hotplug/on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/on-off-test.sh
@ -0,0 +1,238 @@
+#!/bin/bash
+
+SYSFS=
+
+prerequisite()
+{
+	msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit 0
+	fi
+
+	if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
+		echo $msg memory hotplug is not supported >&2
+		exit 0
+	fi
+}
+
+#
+# list all hot-pluggable memory
+#
+hotpluggable_memory()
+{
+	local state=${1:-.\*}
+
+	for memory in $SYSFS/devices/system/memory/memory*; do
+		if grep -q 1 $memory/removable &&
+		   grep -q $state $memory/state; then
+			echo ${memory##/*/memory}
+		fi
+	done
+}
+
+hotplaggable_offline_memory()
+{
+	hotpluggable_memory offline
+}
+
+hotpluggable_online_memory()
+{
+	hotpluggable_memory online
+}
+
+memory_is_online()
+{
+	grep -q online $SYSFS/devices/system/memory/memory$1/state
+}
+
+memory_is_offline()
+{
+	grep -q offline $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory()
+{
+	echo online > $SYSFS/devices/system/memory/memory$1/state
+}
+
+offline_memory()
+{
+	echo offline > $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory_expect_success()
+{
+	local memory=$1
+
+	if ! online_memory $memory; then
+		echo $FUNCNAME $memory: unexpected fail >&2
+	elif ! memory_is_online $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+	fi
+}
+
+online_memory_expect_fail()
+{
+	local memory=$1
+
+	if online_memory $memory 2> /dev/null; then
+		echo $FUNCNAME $memory: unexpected success >&2
+	elif ! memory_is_offline $memory; then
+		echo $FUNCNAME $memory: unexpected online >&2
+	fi
+}
+
+offline_memory_expect_success()
+{
+	local memory=$1
+
+	if ! offline_memory $memory; then
+		echo $FUNCNAME $memory: unexpected fail >&2
+	elif ! memory_is_offline $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+	fi
+}
+
+offline_memory_expect_fail()
+{
+	local memory=$1
+
+	if offline_memory $memory 2> /dev/null; then
+		echo $FUNCNAME $memory: unexpected success >&2
+	elif ! memory_is_online $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+	fi
+}
+
+error=-12
+priority=0
+ratio=10
+
+while getopts e:hp:r: opt; do
+	case $opt in
+	e)
+		error=$OPTARG
+		;;
+	h)
+		echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
+		exit
+		;;
+	p)
+		priority=$OPTARG
+		;;
+	r)
+		ratio=$OPTARG
+		;;
+	esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+	echo "error code must be -4095 <= errno < 0" >&2
+	exit 1
+fi
+
+prerequisite
+
+echo "Test scope: $ratio% hotplug memory"
+echo -e "\t online all hotplug memory in offline state"
+echo -e "\t offline $ratio% hotplug memory in online state"
+echo -e "\t online all hotplug memory in offline state"
+
+#
+# Online all hot-pluggable memory
+#
+for memory in `hotplaggable_offline_memory`; do
+	echo offline-online $memory
+	online_memory_expect_success $memory
+done
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+for memory in `hotpluggable_online_memory`; do
+	if [ $((RANDOM % 100)) -lt $ratio ]; then
+		echo online-offline $memory
+		offline_memory_expect_success $memory
+	fi
+done
+
+#
+# Online all hot-pluggable memory again
+#
+for memory in `hotplaggable_offline_memory`; do
+	echo offline-online $memory
+	online_memory_expect_success $memory
+done
+
+#
+# Test with memory notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
+
+prerequisite_extra()
+{
+	msg="skip extra tests:"
+
+	/sbin/modprobe -q -r memory-notifier-error-inject
+	/sbin/modprobe -q memory-notifier-error-inject priority=$priority
+
+	if [ ! -d "$DEBUGFS" ]; then
+		echo $msg debugfs is not mounted >&2
+		exit 0
+	fi
+
+	if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+		echo $msg memory-notifier-error-inject module is not available >&2
+		exit 0
+	fi
+}
+
+prerequisite_extra
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+	if [ $((RANDOM % 100)) -lt $ratio ]; then
+		offline_memory_expect_success $memory
+	fi
+done
+
+#
+# Test memory hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotplaggable_offline_memory`; do
+	online_memory_expect_fail $memory
+done
+
+#
+# Online all hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotplaggable_offline_memory`; do
+	online_memory_expect_success $memory
+done
+
+#
+# Test memory hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+	offline_memory_expect_fail $memory
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+/sbin/modprobe -q -r memory-notifier-error-inject
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@ -0,0 +1,17 @@
+# Makefile for mount selftests.
+
+all: unprivileged-remount-test
+
+unprivileged-remount-test: unprivileged-remount-test.c
+	gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
+
+# Allow specific tests to be selected.
+test_unprivileged_remount: unprivileged-remount-test
+	@if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+
+run_tests: all test_unprivileged_remount
+
+clean:
+	rm -f unprivileged-remount-test
+
+.PHONY: all test_unprivileged_remount
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@ -0,0 +1,370 @@
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+#ifndef CLONE_NEWUTS
+# define CLONE_NEWUTS 0x04000000
+#endif
+#ifndef CLONE_NEWIPC
+# define CLONE_NEWIPC 0x08000000
+#endif
+#ifndef CLONE_NEWNET
+# define CLONE_NEWNET 0x40000000
+#endif
+#ifndef CLONE_NEWUSER
+# define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWPID
+# define CLONE_NEWPID 0x20000000
+#endif
+
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
+#ifndef MS_RELATIME
+# define MS_RELATIME (1 << 21)
+#endif
+#ifndef MS_STRICTATIME
+# define MS_STRICTATIME (1 << 24)
+#endif
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+	char buf[4096];
+	int fd;
+	ssize_t written;
+	int buf_len;
+
+	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+	if (buf_len < 0) {
+		die("vsnprintf failed: %s\n",
+		    strerror(errno));
+	}
+	if (buf_len >= sizeof(buf)) {
+		die("vsnprintf output truncated\n");
+	}
+
+	fd = open(filename, O_WRONLY);
+	if (fd < 0) {
+		if ((errno == ENOENT) && enoent_ok)
+			return;
+		die("open of %s failed: %s\n",
+		    filename, strerror(errno));
+	}
+	written = write(fd, buf, buf_len);
+	if (written != buf_len) {
+		if (written >= 0) {
+			die("short write to %s\n", filename);
+		} else {
+			die("write to %s failed: %s\n",
+				filename, strerror(errno));
+		}
+	}
+	if (close(fd) != 0) {
+		die("close of %s failed: %s\n",
+			filename, strerror(errno));
+	}
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(true, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(false, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+	int ret;
+	struct statvfs stat;
+	int mnt_flags;
+
+	ret = statvfs(path, &stat);
+	if (ret != 0) {
+		die("statvfs of %s failed: %s\n",
+			path, strerror(errno));
+	}
+	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+			ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+			ST_SYNCHRONOUS | ST_MANDLOCK)) {
+		die("Unrecognized mount flags\n");
+	}
+	mnt_flags = 0;
+	if (stat.f_flag & ST_RDONLY)
+		mnt_flags |= MS_RDONLY;
+	if (stat.f_flag & ST_NOSUID)
+		mnt_flags |= MS_NOSUID;
+	if (stat.f_flag & ST_NODEV)
+		mnt_flags |= MS_NODEV;
+	if (stat.f_flag & ST_NOEXEC)
+		mnt_flags |= MS_NOEXEC;
+	if (stat.f_flag & ST_NOATIME)
+		mnt_flags |= MS_NOATIME;
+	if (stat.f_flag & ST_NODIRATIME)
+		mnt_flags |= MS_NODIRATIME;
+	if (stat.f_flag & ST_RELATIME)
+		mnt_flags |= MS_RELATIME;
+	if (stat.f_flag & ST_SYNCHRONOUS)
+		mnt_flags |= MS_SYNCHRONOUS;
+	if (stat.f_flag & ST_MANDLOCK)
+		mnt_flags |= ST_MANDLOCK;
+
+	return mnt_flags;
+}
+
+static void create_and_enter_userns(void)
+{
+	uid_t uid;
+	gid_t gid;
+
+	uid = getuid();
+	gid = getgid();
+
+	if (unshare(CLONE_NEWUSER) !=0) {
+		die("unshare(CLONE_NEWUSER) failed: %s\n",
+			strerror(errno));
+	}
+
+	maybe_write_file("/proc/self/setgroups", "deny");
+	write_file("/proc/self/uid_map", "0 %d 1", uid);
+	write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+	if (setgid(0) != 0) {
+		die ("setgid(0) failed %s\n",
+			strerror(errno));
+	}
+	if (setuid(0) != 0) {
+		die("setuid(0) failed %s\n",
+			strerror(errno));
+	}
+}
+
+static
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+			 int mount_flags, int remount_flags, int invalid_flags)
+{
+	pid_t child;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	create_and_enter_userns();
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+		die("mount of %s with options '%s' on /tmp failed: %s\n",
+		    fstype,
+		    mount_options? mount_options : "",
+		    strerror(errno));
+	}
+
+	create_and_enter_userns();
+
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	if (mount("/tmp", "/tmp", "none",
+		  MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	if (mount("/tmp", "/tmp", "none",
+		  MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp with invalid flags "
+		    "succeeded unexpectedly\n");
+	}
+	exit(EXIT_SUCCESS);
+}
+
+static bool test_unpriv_remount_simple(int mount_flags)
+{
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
+}
+
+static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+{
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+				   invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+	pid_t child;
+	int ret;
+	const char *orig_path = "/dev";
+	const char *dest_path = "/tmp";
+	int orig_mnt_flags, remount_mnt_flags;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	orig_mnt_flags = read_mnt_flags(orig_path);
+
+	create_and_enter_userns();
+	ret = unshare(CLONE_NEWNS);
+	if (ret != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+	if (ret != 0) {
+		die("recursive bind mount of %s onto %s failed: %s\n",
+			orig_path, dest_path, strerror(errno));
+	}
+
+	ret = mount(dest_path, dest_path, "none",
+		    MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+	if (ret != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	remount_mnt_flags = read_mnt_flags(dest_path);
+	if (orig_mnt_flags != remount_mnt_flags) {
+		die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+			dest_path, orig_path);
+	}
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	if (!test_unpriv_remount_simple(MS_RDONLY)) {
+		die("MS_RDONLY malfunctions\n");
+	}
+	if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
+		die("MS_NODEV malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NOSUID)) {
+		die("MS_NOSUID malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NOEXEC)) {
+		die("MS_NOEXEC malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_RELATIME,
+				       MS_NOATIME))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_STRICTATIME,
+				       MS_NOATIME))
+	{
+		die("MS_STRICTATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_NOATIME,
+				       MS_STRICTATIME))
+	{
+		die("MS_NOATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+				       MS_NOATIME))
+	{
+		die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+				       MS_NOATIME))
+	{
+		die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+				       MS_STRICTATIME))
+	{
+		die("MS_NOATIME|MS_DIRATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
+	{
+		die("Default atime malfunctions\n");
+	}
+	if (!test_priv_mount_unpriv_remount()) {
+		die("Mount flags unexpectedly changed after remount\n");
+	}
+	return EXIT_SUCCESS;
+}
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@ -0,0 +1,10 @@
+all:
+	gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
+	gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+
+run_tests:
+	@./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
+	@./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
+
+clean:
+	rm -f mq_open_tests mq_perf_tests
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@ -0,0 +1,500 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ *	Doug Ledford <dledford@redhat.com>
+ *
+ * mq_open_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_open_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_open_tests.c
+ *   Tests the various situations that should either succeed or fail to
+ *   open a posix message queue and then reports whether or not they
+ *   did as they were supposed to.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+
+static char *usage =
+"Usage:\n"
+"  %s path\n"
+"\n"
+"	path	Path name of the message queue to create\n"
+"\n"
+"	Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default";
+char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default";
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+int default_settings;
+struct rlimit saved_limits, cur_limits;
+int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
+int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
+FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
+char *queue_path;
+mqd_t queue = -1;
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+void validate_current_settings();
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result);
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+	static int in_shutdown = 0;
+
+	/* In case we get called recursively by a set() call below */
+	if (in_shutdown++)
+		return;
+
+	if (seteuid(0) == -1)
+		perror("seteuid() failed");
+
+	if (queue != -1)
+		if (mq_close(queue))
+			perror("mq_close() during shutdown");
+	if (queue_path)
+		/*
+		 * Be silent if this fails, if we cleaned up already it's
+		 * expected to fail
+		 */
+		mq_unlink(queue_path);
+	if (default_settings) {
+		if (saved_def_msgs)
+			__set(def_msgs, saved_def_msgs,
+			      "failed to restore saved_def_msgs");
+		if (saved_def_msgsize)
+			__set(def_msgsize, saved_def_msgsize,
+			      "failed to restore saved_def_msgsize");
+	}
+	if (saved_max_msgs)
+		__set(max_msgs, saved_max_msgs,
+		      "failed to restore saved_max_msgs");
+	if (saved_max_msgsize)
+		__set(max_msgsize, saved_max_msgsize,
+		      "failed to restore saved_max_msgsize");
+	if (exit_val)
+		error(exit_val, errno, "%s at %d", err_cause, line_no);
+	exit(0);
+}
+
+static inline int get(FILE *stream)
+{
+	int value;
+	rewind(stream);
+	if (fscanf(stream, "%d", &value) != 1)
+		shutdown(4, "Error reading /proc entry", __LINE__ - 1);
+	return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+	int new_value;
+
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		return shutdown(5, "Failed writing to /proc file",
+				__LINE__ - 1);
+	new_value = get(stream);
+	if (new_value != value)
+		return shutdown(5, "We didn't get what we wrote to /proc back",
+				__LINE__ - 1);
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+	if (getrlimit(type, rlim))
+		shutdown(6, "getrlimit()", __LINE__ - 1);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+	if (setrlimit(type, rlim))
+		shutdown(7, "setrlimit()", __LINE__ - 1);
+}
+
+void validate_current_settings()
+{
+	int rlim_needed;
+
+	if (cur_limits.rlim_cur < 4096) {
+		printf("Current rlimit value for POSIX message queue bytes is "
+		       "unreasonably low,\nincreasing.\n\n");
+		cur_limits.rlim_cur = 8192;
+		cur_limits.rlim_max = 16384;
+		setr(RLIMIT_MSGQUEUE, &cur_limits);
+	}
+
+	if (default_settings) {
+		rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 +
+						    2 * sizeof(void *));
+		if (rlim_needed > cur_limits.rlim_cur) {
+			printf("Temporarily lowering default queue parameters "
+			       "to something that will work\n"
+			       "with the current rlimit values.\n\n");
+			set(def_msgs, 10);
+			cur_def_msgs = 10;
+			set(def_msgsize, 128);
+			cur_def_msgsize = 128;
+		}
+	} else {
+		rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 +
+						    2 * sizeof(void *));
+		if (rlim_needed > cur_limits.rlim_cur) {
+			printf("Temporarily lowering maximum queue parameters "
+			       "to something that will work\n"
+			       "with the current rlimit values in case this is "
+			       "a kernel that ties the default\n"
+			       "queue parameters to the maximum queue "
+			       "parameters.\n\n");
+			set(max_msgs, 10);
+			cur_max_msgs = 10;
+			set(max_msgsize, 128);
+			cur_max_msgsize = 128;
+		}
+	}
+}
+
+/*
+ * test_queue - Test opening a queue, shutdown if we fail.  This should
+ * only be called in situations that should never fail.  We clean up
+ * after ourselves and return the queue attributes in *result.
+ */
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result)
+{
+	int flags = O_RDWR | O_EXCL | O_CREAT;
+	int perms = DEFFILEMODE;
+
+	if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+		shutdown(1, "mq_open()", __LINE__);
+	if (mq_getattr(queue, result))
+		shutdown(1, "mq_getattr()", __LINE__);
+	if (mq_close(queue))
+		shutdown(1, "mq_close()", __LINE__);
+	queue = -1;
+	if (mq_unlink(queue_path))
+		shutdown(1, "mq_unlink()", __LINE__);
+}
+
+/*
+ * Same as test_queue above, but failure is not fatal.
+ * Returns:
+ * 0 - Failed to create a queue
+ * 1 - Created a queue, attributes in *result
+ */
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result)
+{
+	int flags = O_RDWR | O_EXCL | O_CREAT;
+	int perms = DEFFILEMODE;
+
+	if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+		return 0;
+	if (mq_getattr(queue, result))
+		shutdown(1, "mq_getattr()", __LINE__);
+	if (mq_close(queue))
+		shutdown(1, "mq_close()", __LINE__);
+	queue = -1;
+	if (mq_unlink(queue_path))
+		shutdown(1, "mq_unlink()", __LINE__);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct mq_attr attr, result;
+
+	if (argc != 2) {
+		fprintf(stderr, "Must pass a valid queue name\n\n");
+		fprintf(stderr, usage, argv[0]);
+		exit(1);
+	}
+
+	/*
+	 * Although we can create a msg queue with a non-absolute path name,
+	 * unlink will fail.  So, if the name doesn't start with a /, add one
+	 * when we save it.
+	 */
+	if (*argv[1] == '/')
+		queue_path = strdup(argv[1]);
+	else {
+		queue_path = malloc(strlen(argv[1]) + 2);
+		if (!queue_path) {
+			perror("malloc()");
+			exit(1);
+		}
+		queue_path[0] = '/';
+		queue_path[1] = 0;
+		strcat(queue_path, argv[1]);
+	}
+
+	if (getuid() != 0) {
+		fprintf(stderr, "Not running as root, but almost all tests "
+			"require root in order to modify\nsystem settings.  "
+			"Exiting.\n");
+		exit(1);
+	}
+
+	/* Find out what files there are for us to make tweaks in */
+	def_msgs = fopen(DEF_MSGS, "r+");
+	def_msgsize = fopen(DEF_MSGSIZE, "r+");
+	max_msgs = fopen(MAX_MSGS, "r+");
+	max_msgsize = fopen(MAX_MSGSIZE, "r+");
+
+	if (!max_msgs)
+		shutdown(2, "Failed to open msg_max", __LINE__);
+	if (!max_msgsize)
+		shutdown(2, "Failed to open msgsize_max", __LINE__);
+	if (def_msgs || def_msgsize)
+		default_settings = 1;
+
+	/* Load up the current system values for everything we can */
+	getr(RLIMIT_MSGQUEUE, &saved_limits);
+	cur_limits = saved_limits;
+	if (default_settings) {
+		saved_def_msgs = cur_def_msgs = get(def_msgs);
+		saved_def_msgsize = cur_def_msgsize = get(def_msgsize);
+	}
+	saved_max_msgs = cur_max_msgs = get(max_msgs);
+	saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+
+	/* Tell the user our initial state */
+	printf("\nInitial system state:\n");
+	printf("\tUsing queue path:\t\t%s\n", queue_path);
+	printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n",
+		(long) saved_limits.rlim_cur);
+	printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n",
+		(long) saved_limits.rlim_max);
+	printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs);
+	if (default_settings) {
+		printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize);
+		printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs);
+	} else {
+		printf("\tDefault Message Size:\t\tNot Supported\n");
+		printf("\tDefault Queue Size:\t\tNot Supported\n");
+	}
+	printf("\n");
+
+	validate_current_settings();
+
+	printf("Adjusted system state for testing:\n");
+	printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n", (long) cur_limits.rlim_cur);
+	printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n", (long) cur_limits.rlim_max);
+	printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs);
+	if (default_settings) {
+		printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize);
+		printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs);
+	}
+
+	printf("\n\nTest series 1, behavior when no attr struct "
+	       "passed to mq_open:\n");
+	if (!default_settings) {
+		test_queue(NULL, &result);
+		printf("Given sane system settings, mq_open without an attr "
+		       "struct succeeds:\tPASS\n");
+		if (result.mq_maxmsg != cur_max_msgs ||
+		    result.mq_msgsize != cur_max_msgsize) {
+			printf("Kernel does not support setting the default "
+			       "mq attributes,\nbut also doesn't tie the "
+			       "defaults to the maximums:\t\t\tPASS\n");
+		} else {
+			set(max_msgs, ++cur_max_msgs);
+			set(max_msgsize, ++cur_max_msgsize);
+			test_queue(NULL, &result);
+			if (result.mq_maxmsg == cur_max_msgs &&
+			    result.mq_msgsize == cur_max_msgsize)
+				printf("Kernel does not support setting the "
+				       "default mq attributes and\n"
+				       "also ties system wide defaults to "
+				       "the system wide maximums:\t\t"
+				       "FAIL\n");
+			else
+				printf("Kernel does not support setting the "
+				       "default mq attributes,\n"
+				       "but also doesn't tie the defaults to "
+				       "the maximums:\t\t\tPASS\n");
+		}
+	} else {
+		printf("Kernel supports setting defaults separately from "
+		       "maximums:\t\tPASS\n");
+		/*
+		 * While we are here, go ahead and test that the kernel
+		 * properly follows the default settings
+		 */
+		test_queue(NULL, &result);
+		printf("Given sane values, mq_open without an attr struct "
+		       "succeeds:\t\tPASS\n");
+		if (result.mq_maxmsg != cur_def_msgs ||
+		    result.mq_msgsize != cur_def_msgsize)
+			printf("Kernel supports setting defaults, but does "
+			       "not actually honor them:\tFAIL\n\n");
+		else {
+			set(def_msgs, ++cur_def_msgs);
+			set(def_msgsize, ++cur_def_msgsize);
+			/* In case max was the same as the default */
+			set(max_msgs, ++cur_max_msgs);
+			set(max_msgsize, ++cur_max_msgsize);
+			test_queue(NULL, &result);
+			if (result.mq_maxmsg != cur_def_msgs ||
+			    result.mq_msgsize != cur_def_msgsize)
+				printf("Kernel supports setting defaults, but "
+				       "does not actually honor them:\t"
+				       "FAIL\n");
+			else
+				printf("Kernel properly honors default setting "
+				       "knobs:\t\t\t\tPASS\n");
+		}
+		set(def_msgs, cur_max_msgs + 1);
+		cur_def_msgs = cur_max_msgs + 1;
+		set(def_msgsize, cur_max_msgsize + 1);
+		cur_def_msgsize = cur_max_msgsize + 1;
+		if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >=
+		    cur_limits.rlim_cur) {
+			cur_limits.rlim_cur = (cur_def_msgs + 2) *
+				(cur_def_msgsize + 2 * sizeof(void *));
+			cur_limits.rlim_max = 2 * cur_limits.rlim_cur;
+			setr(RLIMIT_MSGQUEUE, &cur_limits);
+		}
+		if (test_queue_fail(NULL, &result)) {
+			if (result.mq_maxmsg == cur_max_msgs &&
+			    result.mq_msgsize == cur_max_msgsize)
+				printf("Kernel properly limits default values "
+				       "to lesser of default/max:\t\tPASS\n");
+			else
+				printf("Kernel does not properly set default "
+				       "queue parameters when\ndefaults > "
+				       "max:\t\t\t\t\t\t\t\tFAIL\n");
+		} else
+			printf("Kernel fails to open mq because defaults are "
+			       "greater than maximums:\tFAIL\n");
+		set(def_msgs, --cur_def_msgs);
+		set(def_msgsize, --cur_def_msgsize);
+		cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs *
+			cur_def_msgsize;
+		setr(RLIMIT_MSGQUEUE, &cur_limits);
+		if (test_queue_fail(NULL, &result))
+			printf("Kernel creates queue even though defaults "
+			       "would exceed\nrlimit setting:"
+			       "\t\t\t\t\t\t\t\tFAIL\n");
+		else
+			printf("Kernel properly fails to create queue when "
+			       "defaults would\nexceed rlimit:"
+			       "\t\t\t\t\t\t\t\tPASS\n");
+	}
+
+	/*
+	 * Test #2 - open with an attr struct that exceeds rlimit
+	 */
+	printf("\n\nTest series 2, behavior when attr struct is "
+	       "passed to mq_open:\n");
+	cur_max_msgs = 32;
+	cur_max_msgsize = cur_limits.rlim_max >> 4;
+	set(max_msgs, cur_max_msgs);
+	set(max_msgsize, cur_max_msgsize);
+	attr.mq_maxmsg = cur_max_msgs;
+	attr.mq_msgsize = cur_max_msgsize;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open in excess of rlimit max when euid = 0 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open in excess of rlimit max when euid = 0 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = cur_max_msgs + 1;
+	attr.mq_msgsize = 10;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_maxmsg > limit when euid = 0 "
+		       "succeeded:\t\tPASS\n");
+	else
+		printf("Queue open with mq_maxmsg > limit when euid = 0 "
+		       "failed:\t\tFAIL\n");
+	attr.mq_maxmsg = 1;
+	attr.mq_msgsize = cur_max_msgsize + 1;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_msgsize > limit when euid = 0 "
+		       "succeeded:\t\tPASS\n");
+	else
+		printf("Queue open with mq_msgsize > limit when euid = 0 "
+		       "failed:\t\tFAIL\n");
+	attr.mq_maxmsg = 65536;
+	attr.mq_msgsize = 65536;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with total size > 2GB when euid = 0 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with total size > 2GB when euid = 0 "
+		       "failed:\t\t\tPASS\n");
+
+	if (seteuid(99) == -1) {
+		perror("seteuid() failed");
+		exit(1);
+	}
+
+	attr.mq_maxmsg = cur_max_msgs;
+	attr.mq_msgsize = cur_max_msgsize;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open in excess of rlimit max when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open in excess of rlimit max when euid = 99 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = cur_max_msgs + 1;
+	attr.mq_msgsize = 10;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_maxmsg > limit when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with mq_maxmsg > limit when euid = 99 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = 1;
+	attr.mq_msgsize = cur_max_msgsize + 1;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_msgsize > limit when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with mq_msgsize > limit when euid = 99 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = 65536;
+	attr.mq_msgsize = 65536;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with total size > 2GB when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with total size > 2GB when euid = 99 "
+		       "failed:\t\t\tPASS\n");
+
+	shutdown(0,"",0);
+}
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@ -0,0 +1,743 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ *	Doug Ledford <dledford@redhat.com>
+ *
+ * mq_perf_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_perf_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_perf_tests.c
+ *   Tests various types of message queue workloads, concentrating on those
+ *   situations that invole large message sizes, large message queue depths,
+ *   or both, and reports back useful metrics about kernel message queue
+ *   performance.
+ *
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <popt.h>
+
+static char *usage =
+"Usage:\n"
+"  %s [-c #[,#..] -f] path\n"
+"\n"
+"	-c #	Skip most tests and go straight to a high queue depth test\n"
+"		and then run that test continuously (useful for running at\n"
+"		the same time as some other workload to see how much the\n"
+"		cache thrashing caused by adding messages to a very deep\n"
+"		queue impacts the performance of other programs).  The number\n"
+"		indicates which CPU core we should bind the process to during\n"
+"		the run.  If you have more than one physical CPU, then you\n"
+"		will need one copy per physical CPU package, and you should\n"
+"		specify the CPU cores to pin ourself to via a comma separated\n"
+"		list of CPU values.\n"
+"	-f	Only usable with continuous mode.  Pin ourself to the CPUs\n"
+"		as requested, then instead of looping doing a high mq\n"
+"		workload, just busy loop.  This will allow us to lock up a\n"
+"		single CPU just like we normally would, but without actually\n"
+"		thrashing the CPU cache.  This is to make it easier to get\n"
+"		comparable numbers from some other workload running on the\n"
+"		other CPUs.  One set of numbers with # CPUs locked up running\n"
+"		an mq workload, and another set of numbers with those same\n"
+"		CPUs locked away from the test workload, but not doing\n"
+"		anything to trash the cache like the mq workload might.\n"
+"	path	Path name of the message queue to create\n"
+"\n"
+"	Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define MAX_CPUS 64
+char *cpu_option_string;
+int cpus_to_pin[MAX_CPUS];
+int num_cpus_to_pin;
+pthread_t cpu_threads[MAX_CPUS];
+pthread_t main_thread;
+cpu_set_t *cpu_set;
+int cpu_set_size;
+int cpus_online;
+
+#define MSG_SIZE 16
+#define TEST1_LOOPS 10000000
+#define TEST2_LOOPS 100000
+int continuous_mode;
+int continuous_mode_fake;
+
+struct rlimit saved_limits, cur_limits;
+int saved_max_msgs, saved_max_msgsize;
+int cur_max_msgs, cur_max_msgsize;
+FILE *max_msgs, *max_msgsize;
+int cur_nice;
+char *queue_path = "/mq_perf_tests";
+mqd_t queue = -1;
+struct mq_attr result;
+int mq_prio_max;
+
+const struct poptOption options[] = {
+	{
+		.longName = "continuous",
+		.shortName = 'c',
+		.argInfo = POPT_ARG_STRING,
+		.arg = &cpu_option_string,
+		.val = 'c',
+		.descrip = "Run continuous tests at a high queue depth in "
+			"order to test the effects of cache thrashing on "
+			"other tasks on the system.  This test is intended "
+			"to be run on one core of each physical CPU while "
+			"some other CPU intensive task is run on all the other "
+			"cores of that same physical CPU and the other task "
+			"is timed.  It is assumed that the process of adding "
+			"messages to the message queue in a tight loop will "
+			"impact that other task to some degree.  Once the "
+			"tests are performed in this way, you should then "
+			"re-run the tests using fake mode in order to check "
+			"the difference in time required to perform the CPU "
+			"intensive task",
+		.argDescrip = "cpu[,cpu]",
+	},
+	{
+		.longName = "fake",
+		.shortName = 'f',
+		.argInfo = POPT_ARG_NONE,
+		.arg = &continuous_mode_fake,
+		.val = 0,
+		.descrip = "Tie up the CPUs that we would normally tie up in"
+			"continuous mode, but don't actually do any mq stuff, "
+			"just keep the CPU busy so it can't be used to process "
+			"system level tasks as this would free up resources on "
+			"the other CPU cores and skew the comparison between "
+			"the no-mqueue work and mqueue work tests",
+		.argDescrip = NULL,
+	},
+	{
+		.longName = "path",
+		.shortName = 'p',
+		.argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT,
+		.arg = &queue_path,
+		.val = 'p',
+		.descrip = "The name of the path to use in the mqueue "
+			"filesystem for our tests",
+		.argDescrip = "pathname",
+	},
+	POPT_AUTOHELP
+	POPT_TABLEEND
+};
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context);
+void sig_action(int signum, siginfo_t *info, void *context);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline int try_set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+static inline void open_queue(struct mq_attr *attr);
+void increase_limits(void);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+	static int in_shutdown = 0;
+	int errno_at_shutdown = errno;
+	int i;
+
+	/* In case we get called by multiple threads or from an sighandler */
+	if (in_shutdown++)
+		return;
+
+	for (i = 0; i < num_cpus_to_pin; i++)
+		if (cpu_threads[i]) {
+			pthread_kill(cpu_threads[i], SIGUSR1);
+			pthread_join(cpu_threads[i], NULL);
+		}
+
+	if (queue != -1)
+		if (mq_close(queue))
+			perror("mq_close() during shutdown");
+	if (queue_path)
+		/*
+		 * Be silent if this fails, if we cleaned up already it's
+		 * expected to fail
+		 */
+		mq_unlink(queue_path);
+	if (saved_max_msgs)
+		__set(max_msgs, saved_max_msgs,
+		      "failed to restore saved_max_msgs");
+	if (saved_max_msgsize)
+		__set(max_msgsize, saved_max_msgsize,
+		      "failed to restore saved_max_msgsize");
+	if (exit_val)
+		error(exit_val, errno_at_shutdown, "%s at %d",
+		      err_cause, line_no);
+	exit(0);
+}
+
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context)
+{
+	if (pthread_self() != main_thread)
+		pthread_exit(0);
+	else {
+		fprintf(stderr, "Caught signal %d in SIGUSR1 handler, "
+				"exiting\n", signum);
+		shutdown(0, "", 0);
+		fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+		exit(0);
+	}
+}
+
+void sig_action(int signum, siginfo_t *info, void *context)
+{
+	if (pthread_self() != main_thread)
+		pthread_kill(main_thread, signum);
+	else {
+		fprintf(stderr, "Caught signal %d, exiting\n", signum);
+		shutdown(0, "", 0);
+		fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+		exit(0);
+	}
+}
+
+static inline int get(FILE *stream)
+{
+	int value;
+	rewind(stream);
+	if (fscanf(stream, "%d", &value) != 1)
+		shutdown(4, "Error reading /proc entry", __LINE__);
+	return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+	int new_value;
+
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		return shutdown(5, "Failed writing to /proc file", __LINE__);
+	new_value = get(stream);
+	if (new_value != value)
+		return shutdown(5, "We didn't get what we wrote to /proc back",
+				__LINE__);
+}
+
+static inline int try_set(FILE *stream, int value)
+{
+	int new_value;
+
+	rewind(stream);
+	fprintf(stream, "%d", value);
+	new_value = get(stream);
+	return new_value == value;
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+	if (getrlimit(type, rlim))
+		shutdown(6, "getrlimit()", __LINE__);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+	if (setrlimit(type, rlim))
+		shutdown(7, "setrlimit()", __LINE__);
+}
+
+/**
+ * open_queue - open the global queue for testing
+ * @attr - An attr struct specifying the desired queue traits
+ * @result - An attr struct that lists the actual traits the queue has
+ *
+ * This open is not allowed to fail, failure will result in an orderly
+ * shutdown of the program.  The global queue_path is used to set what
+ * queue to open, the queue descriptor is saved in the global queue
+ * variable.
+ */
+static inline void open_queue(struct mq_attr *attr)
+{
+	int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK;
+	int perms = DEFFILEMODE;
+
+	queue = mq_open(queue_path, flags, perms, attr);
+	if (queue == -1)
+		shutdown(1, "mq_open()", __LINE__);
+	if (mq_getattr(queue, &result))
+		shutdown(1, "mq_getattr()", __LINE__);
+	printf("\n\tQueue %s created:\n", queue_path);
+	printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ?
+	       "O_NONBLOCK" : "(null)");
+	printf("\t\tmq_maxmsg:\t\t\t%lu\n", result.mq_maxmsg);
+	printf("\t\tmq_msgsize:\t\t\t%lu\n", result.mq_msgsize);
+	printf("\t\tmq_curmsgs:\t\t\t%lu\n", result.mq_curmsgs);
+}
+
+void *fake_cont_thread(void *arg)
+{
+	int i;
+
+	for (i = 0; i < num_cpus_to_pin; i++)
+		if (cpu_threads[i] == pthread_self())
+			break;
+	printf("\tStarted fake continuous mode thread %d on CPU %d\n", i,
+	       cpus_to_pin[i]);
+	while (1)
+		;
+}
+
+void *cont_thread(void *arg)
+{
+	char buff[MSG_SIZE];
+	int i, priority;
+
+	for (i = 0; i < num_cpus_to_pin; i++)
+		if (cpu_threads[i] == pthread_self())
+			break;
+	printf("\tStarted continuous mode thread %d on CPU %d\n", i,
+	       cpus_to_pin[i]);
+	while (1) {
+		while (mq_send(queue, buff, sizeof(buff), 0) == 0)
+			;
+		mq_receive(queue, buff, sizeof(buff), &priority);
+	}
+}
+
+#define drain_queue() \
+	while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE)
+
+#define do_untimed_send() \
+	do { \
+		if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+			shutdown(3, "Test send failure", __LINE__); \
+	} while (0)
+
+#define do_send_recv() \
+	do { \
+		clock_gettime(clock, &start); \
+		if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+			shutdown(3, "Test send failure", __LINE__); \
+		clock_gettime(clock, &middle); \
+		if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \
+			shutdown(3, "Test receive failure", __LINE__); \
+		clock_gettime(clock, &end); \
+		nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \
+			(middle.tv_nsec - start.tv_nsec); \
+		send_total.tv_nsec += nsec; \
+		if (send_total.tv_nsec >= 1000000000) { \
+			send_total.tv_sec++; \
+			send_total.tv_nsec -= 1000000000; \
+		} \
+		nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \
+			(end.tv_nsec - middle.tv_nsec); \
+		recv_total.tv_nsec += nsec; \
+		if (recv_total.tv_nsec >= 1000000000) { \
+			recv_total.tv_sec++; \
+			recv_total.tv_nsec -= 1000000000; \
+		} \
+	} while (0)
+
+struct test {
+	char *desc;
+	void (*func)(int *);
+};
+
+void const_prio(int *prio)
+{
+	return;
+}
+
+void inc_prio(int *prio)
+{
+	if (++*prio == mq_prio_max)
+		*prio = 0;
+}
+
+void dec_prio(int *prio)
+{
+	if (--*prio < 0)
+		*prio = mq_prio_max - 1;
+}
+
+void random_prio(int *prio)
+{
+	*prio = random() % mq_prio_max;
+}
+
+struct test test2[] = {
+	{"\n\tTest #2a: Time send/recv message, queue full, constant prio\n",
+		const_prio},
+	{"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n",
+		inc_prio},
+	{"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n",
+		dec_prio},
+	{"\n\tTest #2d: Time send/recv message, queue full, random prio\n",
+		random_prio},
+	{NULL, NULL}
+};
+
+/**
+ * Tests to perform (all done with MSG_SIZE messages):
+ *
+ * 1) Time to add/remove message with 0 messages on queue
+ * 1a) with constant prio
+ * 2) Time to add/remove message when queue close to capacity:
+ * 2a) with constant prio
+ * 2b) with increasing prio
+ * 2c) with decreasing prio
+ * 2d) with random prio
+ * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX)
+ */
+void *perf_test_thread(void *arg)
+{
+	char buff[MSG_SIZE];
+	int prio_out, prio_in;
+	int i;
+	clockid_t clock;
+	pthread_t *t;
+	struct timespec res, start, middle, end, send_total, recv_total;
+	unsigned long long nsec;
+	struct test *cur_test;
+
+	t = &cpu_threads[0];
+	printf("\n\tStarted mqueue performance test thread on CPU %d\n",
+	       cpus_to_pin[0]);
+	mq_prio_max = sysconf(_SC_MQ_PRIO_MAX);
+	if (mq_prio_max == -1)
+		shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__);
+	if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0)
+		shutdown(2, "pthread_getcpuclockid", __LINE__);
+
+	if (clock_getres(clock, &res))
+		shutdown(2, "clock_getres()", __LINE__);
+
+	printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max);
+	printf("\t\tClock resolution:\t\t%lu nsec%s\n", res.tv_nsec,
+	       res.tv_nsec > 1 ? "s" : "");
+
+
+
+	printf("\n\tTest #1: Time send/recv message, queue empty\n");
+	printf("\t\t(%d iterations)\n", TEST1_LOOPS);
+	prio_out = 0;
+	send_total.tv_sec = 0;
+	send_total.tv_nsec = 0;
+	recv_total.tv_sec = 0;
+	recv_total.tv_nsec = 0;
+	for (i = 0; i < TEST1_LOOPS; i++)
+		do_send_recv();
+	printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+	       send_total.tv_sec, send_total.tv_nsec);
+	nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+		 send_total.tv_nsec) / TEST1_LOOPS;
+	printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+	printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+	       recv_total.tv_sec, recv_total.tv_nsec);
+	nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+		recv_total.tv_nsec) / TEST1_LOOPS;
+	printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+
+
+	for (cur_test = test2; cur_test->desc != NULL; cur_test++) {
+		printf("%s:\n", cur_test->desc);
+		printf("\t\t(%d iterations)\n", TEST2_LOOPS);
+		prio_out = 0;
+		send_total.tv_sec = 0;
+		send_total.tv_nsec = 0;
+		recv_total.tv_sec = 0;
+		recv_total.tv_nsec = 0;
+		printf("\t\tFilling queue...");
+		fflush(stdout);
+		clock_gettime(clock, &start);
+		for (i = 0; i < result.mq_maxmsg - 1; i++) {
+			do_untimed_send();
+			cur_test->func(&prio_out);
+		}
+		clock_gettime(clock, &end);
+		nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+			1000000000) + (end.tv_nsec - start.tv_nsec);
+		printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+		       nsec % 1000000000);
+		printf("\t\tTesting...");
+		fflush(stdout);
+		for (i = 0; i < TEST2_LOOPS; i++) {
+			do_send_recv();
+			cur_test->func(&prio_out);
+		}
+		printf("done.\n");
+		printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+		       send_total.tv_sec, send_total.tv_nsec);
+		nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+			 send_total.tv_nsec) / TEST2_LOOPS;
+		printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+		printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+		       recv_total.tv_sec, recv_total.tv_nsec);
+		nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+			recv_total.tv_nsec) / TEST2_LOOPS;
+		printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+		printf("\t\tDraining queue...");
+		fflush(stdout);
+		clock_gettime(clock, &start);
+		drain_queue();
+		clock_gettime(clock, &end);
+		nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+			1000000000) + (end.tv_nsec - start.tv_nsec);
+		printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+		       nsec % 1000000000);
+	}
+	return 0;
+}
+
+void increase_limits(void)
+{
+	cur_limits.rlim_cur = RLIM_INFINITY;
+	cur_limits.rlim_max = RLIM_INFINITY;
+	setr(RLIMIT_MSGQUEUE, &cur_limits);
+	while (try_set(max_msgs, cur_max_msgs += 10))
+		;
+	cur_max_msgs = get(max_msgs);
+	while (try_set(max_msgsize, cur_max_msgsize += 1024))
+		;
+	cur_max_msgsize = get(max_msgsize);
+	if (setpriority(PRIO_PROCESS, 0, -20) != 0)
+		shutdown(2, "setpriority()", __LINE__);
+	cur_nice = -20;
+}
+
+int main(int argc, char *argv[])
+{
+	struct mq_attr attr;
+	char *option, *next_option;
+	int i, cpu;
+	struct sigaction sa;
+	poptContext popt_context;
+	char rc;
+	void *retval;
+
+	main_thread = pthread_self();
+	num_cpus_to_pin = 0;
+
+	if (sysconf(_SC_NPROCESSORS_ONLN) == -1) {
+		perror("sysconf(_SC_NPROCESSORS_ONLN)");
+		exit(1);
+	}
+	cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+	cpu_set = CPU_ALLOC(cpus_online);
+	if (cpu_set == NULL) {
+		perror("CPU_ALLOC()");
+		exit(1);
+	}
+	cpu_set_size = CPU_ALLOC_SIZE(cpus_online);
+	CPU_ZERO_S(cpu_set_size, cpu_set);
+
+	popt_context = poptGetContext(NULL, argc, (const char **)argv,
+				      options, 0);
+
+	while ((rc = poptGetNextOpt(popt_context)) > 0) {
+		switch (rc) {
+		case 'c':
+			continuous_mode = 1;
+			option = cpu_option_string;
+			do {
+				next_option = strchr(option, ',');
+				if (next_option)
+					*next_option = '\0';
+				cpu = atoi(option);
+				if (cpu >= cpus_online)
+					fprintf(stderr, "CPU %d exceeds "
+						"cpus online, ignoring.\n",
+						cpu);
+				else
+					cpus_to_pin[num_cpus_to_pin++] = cpu;
+				if (next_option)
+					option = ++next_option;
+			} while (next_option && num_cpus_to_pin < MAX_CPUS);
+			/* Double check that they didn't give us the same CPU
+			 * more than once */
+			for (cpu = 0; cpu < num_cpus_to_pin; cpu++) {
+				if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size,
+						cpu_set)) {
+					fprintf(stderr, "Any given CPU may "
+						"only be given once.\n");
+					exit(1);
+				} else
+					CPU_SET_S(cpus_to_pin[cpu],
+						  cpu_set_size, cpu_set);
+			}
+			break;
+		case 'p':
+			/*
+			 * Although we can create a msg queue with a
+			 * non-absolute path name, unlink will fail.  So,
+			 * if the name doesn't start with a /, add one
+			 * when we save it.
+			 */
+			option = queue_path;
+			if (*option != '/') {
+				queue_path = malloc(strlen(option) + 2);
+				if (!queue_path) {
+					perror("malloc()");
+					exit(1);
+				}
+				queue_path[0] = '/';
+				queue_path[1] = 0;
+				strcat(queue_path, option);
+				free(option);
+			}
+			break;
+		}
+	}
+
+	if (continuous_mode && num_cpus_to_pin == 0) {
+		fprintf(stderr, "Must pass at least one CPU to continuous "
+			"mode.\n");
+		poptPrintUsage(popt_context, stderr, 0);
+		exit(1);
+	} else if (!continuous_mode) {
+		num_cpus_to_pin = 1;
+		cpus_to_pin[0] = cpus_online - 1;
+	}
+
+	if (getuid() != 0) {
+		fprintf(stderr, "Not running as root, but almost all tests "
+			"require root in order to modify\nsystem settings.  "
+			"Exiting.\n");
+		exit(1);
+	}
+
+	max_msgs = fopen(MAX_MSGS, "r+");
+	max_msgsize = fopen(MAX_MSGSIZE, "r+");
+	if (!max_msgs)
+		shutdown(2, "Failed to open msg_max", __LINE__);
+	if (!max_msgsize)
+		shutdown(2, "Failed to open msgsize_max", __LINE__);
+
+	/* Load up the current system values for everything we can */
+	getr(RLIMIT_MSGQUEUE, &saved_limits);
+	cur_limits = saved_limits;
+	saved_max_msgs = cur_max_msgs = get(max_msgs);
+	saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+	errno = 0;
+	cur_nice = getpriority(PRIO_PROCESS, 0);
+	if (errno)
+		shutdown(2, "getpriority()", __LINE__);
+
+	/* Tell the user our initial state */
+	printf("\nInitial system state:\n");
+	printf("\tUsing queue path:\t\t\t%s\n", queue_path);
+	printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+		(long) saved_limits.rlim_cur);
+	printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+		(long) saved_limits.rlim_max);
+	printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs);
+	printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+	printf("\n");
+
+	increase_limits();
+
+	printf("Adjusted system state for testing:\n");
+	if (cur_limits.rlim_cur == RLIM_INFINITY) {
+		printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n");
+		printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n");
+	} else {
+		printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+		       (long) cur_limits.rlim_cur);
+		printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+		       (long) cur_limits.rlim_max);
+	}
+	printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs);
+	printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+	printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ?
+	       (continuous_mode_fake ? "fake mode" : "enabled") :
+	       "disabled");
+	printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]);
+	for (cpu = 1; cpu < num_cpus_to_pin; cpu++)
+			printf(",%d", cpus_to_pin[cpu]);
+	printf("\n");
+
+	sa.sa_sigaction = sig_action_SIGUSR1;
+	sigemptyset(&sa.sa_mask);
+	sigaddset(&sa.sa_mask, SIGHUP);
+	sigaddset(&sa.sa_mask, SIGINT);
+	sigaddset(&sa.sa_mask, SIGQUIT);
+	sigaddset(&sa.sa_mask, SIGTERM);
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGUSR1)", __LINE__);
+	sa.sa_sigaction = sig_action;
+	if (sigaction(SIGHUP, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGHUP)", __LINE__);
+	if (sigaction(SIGINT, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGINT)", __LINE__);
+	if (sigaction(SIGQUIT, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGQUIT)", __LINE__);
+	if (sigaction(SIGTERM, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGTERM)", __LINE__);
+
+	if (!continuous_mode_fake) {
+		attr.mq_flags = O_NONBLOCK;
+		attr.mq_maxmsg = cur_max_msgs;
+		attr.mq_msgsize = MSG_SIZE;
+		open_queue(&attr);
+	}
+	for (i = 0; i < num_cpus_to_pin; i++) {
+		pthread_attr_t thread_attr;
+		void *thread_func;
+
+		if (continuous_mode_fake)
+			thread_func = &fake_cont_thread;
+		else if (continuous_mode)
+			thread_func = &cont_thread;
+		else
+			thread_func = &perf_test_thread;
+
+		CPU_ZERO_S(cpu_set_size, cpu_set);
+		CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set);
+		pthread_attr_init(&thread_attr);
+		pthread_attr_setaffinity_np(&thread_attr, cpu_set_size,
+					    cpu_set);
+		if (pthread_create(&cpu_threads[i], &thread_attr, thread_func,
+				   NULL))
+			shutdown(1, "pthread_create()", __LINE__);
+		pthread_attr_destroy(&thread_attr);
+	}
+
+	if (!continuous_mode) {
+		pthread_join(cpu_threads[0], &retval);
+		shutdown((long)retval, "perf_test_thread()", __LINE__);
+	} else {
+		while (1)
+			sleep(1);
+	}
+	shutdown(0, "", 0);
+}
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@ -0,0 +1,25 @@
+# Makefile for net selftests
+
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -Wall -O2 -g
+
+CFLAGS += -I../../../../usr/include/
+
+NET_PROGS = socket psock_fanout psock_tpacket
+
+all: $(NET_PROGS)
+%: %.c
+	$(CC) $(CFLAGS) -o $@ $^
+
+run_tests: all
+	@/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
+	@/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
+	@if /sbin/modprobe test_bpf ; then \
+		/sbin/rmmod test_bpf; \
+		echo "test_bpf: ok"; \
+	else \
+		echo "test_bpf: [FAIL]"; \
+		exit 1; \
+	fi
+clean:
+	$(RM) $(NET_PROGS)
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@ -0,0 +1,312 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn (willemb@google.com)
+ *
+ * A basic test of packet socket fanout behavior.
+ *
+ * Control:
+ * - create fanout fails as expected with illegal flag combinations
+ * - join   fanout fails as expected with diverging types or flags
+ *
+ * Datapath:
+ *   Open a pair of packet sockets and a pair of INET sockets, send a known
+ *   number of packets across the two INET sockets and count the number of
+ *   packets enqueued onto the two packet sockets.
+ *
+ *   The test currently runs for
+ *   - PACKET_FANOUT_HASH
+ *   - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
+ *   - PACKET_FANOUT_LB
+ *   - PACKET_FANOUT_CPU
+ *   - PACKET_FANOUT_ROLLOVER
+ *
+ * Todo:
+ * - functionality: PACKET_FANOUT_FLAG_DEFRAG
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE		/* for sched_setaffinity */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+#define RING_NUM_FRAMES			20
+
+/* Open a socket in a given fanout mode.
+ * @return -1 if mode is bad, a valid socket otherwise */
+static int sock_fanout_open(uint16_t typeflags, int num_packets)
+{
+	int fd, val;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
+	if (fd < 0) {
+		perror("socket packet");
+		exit(1);
+	}
+
+	/* fanout group ID is always 0: tests whether old groups are deleted */
+	val = ((int) typeflags) << 16;
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+		if (close(fd)) {
+			perror("close packet");
+			exit(1);
+		}
+		return -1;
+	}
+
+	pair_udp_setfilter(fd);
+	return fd;
+}
+
+static char *sock_fanout_open_ring(int fd)
+{
+	struct tpacket_req req = {
+		.tp_block_size = getpagesize(),
+		.tp_frame_size = getpagesize(),
+		.tp_block_nr   = RING_NUM_FRAMES,
+		.tp_frame_nr   = RING_NUM_FRAMES,
+	};
+	char *ring;
+	int val = TPACKET_V2;
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
+		       sizeof(val))) {
+		perror("packetsock ring setsockopt version");
+		exit(1);
+	}
+	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
+		       sizeof(req))) {
+		perror("packetsock ring setsockopt");
+		exit(1);
+	}
+
+	ring = mmap(0, req.tp_block_size * req.tp_block_nr,
+		    PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (!ring) {
+		fprintf(stderr, "packetsock ring mmap\n");
+		exit(1);
+	}
+
+	return ring;
+}
+
+static int sock_fanout_read_ring(int fd, void *ring)
+{
+	struct tpacket2_hdr *header = ring;
+	int count = 0;
+
+	while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
+		count++;
+		header = ring + (count * getpagesize());
+	}
+
+	return count;
+}
+
+static int sock_fanout_read(int fds[], char *rings[], const int expect[])
+{
+	int ret[2];
+
+	ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
+	ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
+
+	fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
+			ret[0], ret[1], expect[0], expect[1]);
+
+	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
+	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
+		fprintf(stderr, "ERROR: incorrect queue lengths\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Test illegal mode + flag combination */
+static void test_control_single(void)
+{
+	fprintf(stderr, "test: control single socket\n");
+
+	if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
+			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+		fprintf(stderr, "ERROR: opened socket with dual rollover\n");
+		exit(1);
+	}
+}
+
+/* Test illegal group with different modes or flags */
+static void test_control_group(void)
+{
+	int fds[2];
+
+	fprintf(stderr, "test: control multiple sockets\n");
+
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed to open HASH socket\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_HASH |
+			       PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_HASH |
+			       PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong mode\n");
+		exit(1);
+	}
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
+	if (fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+	if (close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
+static int test_datapath(uint16_t typeflags, int port_off,
+			 const int expect1[], const int expect2[])
+{
+	const int expect0[] = { 0, 0 };
+	char *rings[2];
+	int fds[2], fds_udp[2][2], ret;
+
+	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+
+	fds[0] = sock_fanout_open(typeflags, 20);
+	fds[1] = sock_fanout_open(typeflags, 20);
+	if (fds[0] == -1 || fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed open\n");
+		exit(1);
+	}
+	rings[0] = sock_fanout_open_ring(fds[0]);
+	rings[1] = sock_fanout_open_ring(fds[1]);
+	pair_udp_open(fds_udp[0], PORT_BASE);
+	pair_udp_open(fds_udp[1], PORT_BASE + port_off);
+	sock_fanout_read(fds, rings, expect0);
+
+	/* Send data, but not enough to overflow a queue */
+	pair_udp_send(fds_udp[0], 15);
+	pair_udp_send(fds_udp[1], 5);
+	ret = sock_fanout_read(fds, rings, expect1);
+
+	/* Send more data, overflow the queue */
+	pair_udp_send(fds_udp[0], 15);
+	/* TODO: ensure consistent order between expect1 and expect2 */
+	ret |= sock_fanout_read(fds, rings, expect2);
+
+	if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
+	    munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
+		fprintf(stderr, "close rings\n");
+		exit(1);
+	}
+	if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
+	    close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
+	    close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "close datapath\n");
+		exit(1);
+	}
+
+	return ret;
+}
+
+static int set_cpuaffinity(int cpuid)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpuid, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask)) {
+		if (errno != EINVAL) {
+			fprintf(stderr, "setaffinity %d\n", cpuid);
+			exit(1);
+		}
+		return 1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	const int expect_hash[2][2]	= { { 15, 5 },  { 20, 5 } };
+	const int expect_hash_rb[2][2]	= { { 15, 5 },  { 20, 15 } };
+	const int expect_lb[2][2]	= { { 10, 10 }, { 18, 17 } };
+	const int expect_rb[2][2]	= { { 20, 0 },  { 20, 15 } };
+	const int expect_cpu0[2][2]	= { { 20, 0 },  { 20, 0 } };
+	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
+	int port_off = 2, tries = 5, ret;
+
+	test_control_single();
+	test_control_group();
+
+	/* find a set of ports that do not collide onto the same socket */
+	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
+			    expect_hash[0], expect_hash[1]);
+	while (ret && tries--) {
+		fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
+		ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
+				    expect_hash[0], expect_hash[1]);
+	}
+
+	ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
+			     port_off, expect_hash_rb[0], expect_hash_rb[1]);
+	ret |= test_datapath(PACKET_FANOUT_LB,
+			     port_off, expect_lb[0], expect_lb[1]);
+	ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
+			     port_off, expect_rb[0], expect_rb[1]);
+
+	set_cpuaffinity(0);
+	ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+			     expect_cpu0[0], expect_cpu0[1]);
+	if (!set_cpuaffinity(1))
+		/* TODO: test that choice alternates with previous */
+		ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+				     expect_cpu1[0], expect_cpu1[1]);
+
+	if (ret)
+		return 1;
+
+	printf("OK. All tests passed\n");
+	return 0;
+}
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@ -0,0 +1,127 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn <willemb@google.com>
+ *         Daniel Borkmann <dborkman@redhat.com>
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef PSOCK_LIB_H
+#define PSOCK_LIB_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#define DATA_LEN			100
+#define DATA_CHAR			'a'
+
+#define PORT_BASE			8000
+
+#ifndef __maybe_unused
+# define __maybe_unused		__attribute__ ((__unused__))
+#endif
+
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+	struct sock_filter bpf_filter[] = {
+		{ 0x80, 0, 0, 0x00000000 },  /* LD  pktlen		      */
+		{ 0x35, 0, 5, DATA_LEN   },  /* JGE DATA_LEN  [f goto nomatch]*/
+		{ 0x30, 0, 0, 0x00000050 },  /* LD  ip[80]		      */
+		{ 0x15, 0, 3, DATA_CHAR  },  /* JEQ DATA_CHAR [f goto nomatch]*/
+		{ 0x30, 0, 0, 0x00000051 },  /* LD  ip[81]		      */
+		{ 0x15, 0, 1, DATA_CHAR  },  /* JEQ DATA_CHAR [f goto nomatch]*/
+		{ 0x06, 0, 0, 0x00000060 },  /* RET match	              */
+		{ 0x06, 0, 0, 0x00000000 },  /* RET no match		      */
+	};
+	struct sock_fprog bpf_prog;
+
+	bpf_prog.filter = bpf_filter;
+	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+		       sizeof(bpf_prog))) {
+		perror("setsockopt SO_ATTACH_FILTER");
+		exit(1);
+	}
+}
+
+static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
+{
+	struct sockaddr_in saddr, daddr;
+
+	fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
+	fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fds[0] == -1 || fds[1] == -1) {
+		fprintf(stderr, "ERROR: socket dgram\n");
+		exit(1);
+	}
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = htons(port);
+	saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	memset(&daddr, 0, sizeof(daddr));
+	daddr.sin_family = AF_INET;
+	daddr.sin_port = htons(port + 1);
+	daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	/* must bind both to get consistent hash result */
+	if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
+		perror("bind");
+		exit(1);
+	}
+	if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
+		perror("bind");
+		exit(1);
+	}
+	if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
+		perror("connect");
+		exit(1);
+	}
+}
+
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+	char buf[DATA_LEN], rbuf[DATA_LEN];
+
+	memset(buf, DATA_CHAR, sizeof(buf));
+	while (num--) {
+		/* Should really handle EINTR and EAGAIN */
+		if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
+			fprintf(stderr, "ERROR: send failed left=%d\n", num);
+			exit(1);
+		}
+		if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
+			fprintf(stderr, "ERROR: recv failed left=%d\n", num);
+			exit(1);
+		}
+		if (memcmp(buf, rbuf, sizeof(buf))) {
+			fprintf(stderr, "ERROR: data failed left=%d\n", num);
+			exit(1);
+		}
+	}
+}
+
+static __maybe_unused void pair_udp_close(int fds[])
+{
+	close(fds[0]);
+	close(fds[1]);
+}
+
+#endif /* PSOCK_LIB_H */
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@ -0,0 +1,805 @@
+/*
+ * Copyright 2013 Red Hat, Inc.
+ * Author: Daniel Borkmann <dborkman@redhat.com>
+ *         Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
+ *
+ * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
+ *
+ * Control:
+ *   Test the setup of the TPACKET socket with different patterns that are
+ *   known to fail (TODO) resp. succeed (OK).
+ *
+ * Datapath:
+ *   Open a pair of packet sockets and send resp. receive an a priori known
+ *   packet pattern accross the sockets and check if it was received resp.
+ *   sent correctly. Fanout in combination with RX_RING is currently not
+ *   tested here.
+ *
+ *   The test currently runs for
+ *   - TPACKET_V1: RX_RING, TX_RING
+ *   - TPACKET_V2: RX_RING, TX_RING
+ *   - TPACKET_V3: RX_RING
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if_packet.h>
+#include <linux/filter.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <net/if.h>
+#include <inttypes.h>
+#include <poll.h>
+
+#include "psock_lib.h"
+
+#ifndef bug_on
+# define bug_on(cond)		assert(!(cond))
+#endif
+
+#ifndef __aligned_tpacket
+# define __aligned_tpacket	__attribute__((aligned(TPACKET_ALIGNMENT)))
+#endif
+
+#ifndef __align_tpacket
+# define __align_tpacket(x)	__attribute__((aligned(TPACKET_ALIGN(x))))
+#endif
+
+#define NUM_PACKETS		100
+#define ALIGN_8(x)		(((x) + 8 - 1) & ~(8 - 1))
+
+struct ring {
+	struct iovec *rd;
+	uint8_t *mm_space;
+	size_t mm_len, rd_len;
+	struct sockaddr_ll ll;
+	void (*walk)(int sock, struct ring *ring);
+	int type, rd_num, flen, version;
+	union {
+		struct tpacket_req  req;
+		struct tpacket_req3 req3;
+	};
+};
+
+struct block_desc {
+	uint32_t version;
+	uint32_t offset_to_priv;
+	struct tpacket_hdr_v1 h1;
+};
+
+union frame_map {
+	struct {
+		struct tpacket_hdr tp_h __aligned_tpacket;
+		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
+	} *v1;
+	struct {
+		struct tpacket2_hdr tp_h __aligned_tpacket;
+		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
+	} *v2;
+	void *raw;
+};
+
+static unsigned int total_packets, total_bytes;
+
+static int pfsocket(int ver)
+{
+	int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	if (sock == -1) {
+		perror("socket");
+		exit(1);
+	}
+
+	ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+	if (ret == -1) {
+		perror("setsockopt");
+		exit(1);
+	}
+
+	return sock;
+}
+
+static void status_bar_update(void)
+{
+	if (total_packets % 10 == 0) {
+		fprintf(stderr, ".");
+		fflush(stderr);
+	}
+}
+
+static void test_payload(void *pay, size_t len)
+{
+	struct ethhdr *eth = pay;
+
+	if (len < sizeof(struct ethhdr)) {
+		fprintf(stderr, "test_payload: packet too "
+			"small: %zu bytes!\n", len);
+		exit(1);
+	}
+
+	if (eth->h_proto != htons(ETH_P_IP)) {
+		fprintf(stderr, "test_payload: wrong ethernet "
+			"type: 0x%x!\n", ntohs(eth->h_proto));
+		exit(1);
+	}
+}
+
+static void create_payload(void *pay, size_t *len)
+{
+	int i;
+	struct ethhdr *eth = pay;
+	struct iphdr *ip = pay + sizeof(*eth);
+
+	/* Lets create some broken crap, that still passes
+	 * our BPF filter.
+	 */
+
+	*len = DATA_LEN + 42;
+
+	memset(pay, 0xff, ETH_ALEN * 2);
+	eth->h_proto = htons(ETH_P_IP);
+
+	for (i = 0; i < sizeof(*ip); ++i)
+		((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
+
+	ip->ihl = 5;
+	ip->version = 4;
+	ip->protocol = 0x11;
+	ip->frag_off = 0;
+	ip->ttl = 64;
+	ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
+
+	ip->saddr = htonl(INADDR_LOOPBACK);
+	ip->daddr = htonl(INADDR_LOOPBACK);
+
+	memset(pay + sizeof(*eth) + sizeof(*ip),
+	       DATA_CHAR, DATA_LEN);
+}
+
+static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
+{
+	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_KERNEL;
+	__sync_synchronize();
+}
+
+static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_KERNEL;
+	__sync_synchronize();
+}
+
+static inline int __v1_v2_rx_kernel_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		return __v1_rx_kernel_ready(base);
+	case TPACKET_V2:
+		return __v2_rx_kernel_ready(base);
+	default:
+		bug_on(1);
+		return 0;
+	}
+}
+
+static inline void __v1_v2_rx_user_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		__v1_rx_user_ready(base);
+		break;
+	case TPACKET_V2:
+		__v2_rx_user_ready(base);
+		break;
+	}
+}
+
+static void walk_v1_v2_rx(int sock, struct ring *ring)
+{
+	struct pollfd pfd;
+	int udp_sock[2];
+	union frame_map ppd;
+	unsigned int frame_num = 0;
+
+	bug_on(ring->type != PACKET_RX_RING);
+
+	pair_udp_open(udp_sock, PORT_BASE);
+	pair_udp_setfilter(sock);
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = sock;
+	pfd.events = POLLIN | POLLERR;
+	pfd.revents = 0;
+
+	pair_udp_send(udp_sock, NUM_PACKETS);
+
+	while (total_packets < NUM_PACKETS * 2) {
+		while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
+					       ring->version)) {
+			ppd.raw = ring->rd[frame_num].iov_base;
+
+			switch (ring->version) {
+			case TPACKET_V1:
+				test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
+					     ppd.v1->tp_h.tp_snaplen);
+				total_bytes += ppd.v1->tp_h.tp_snaplen;
+				break;
+
+			case TPACKET_V2:
+				test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
+					     ppd.v2->tp_h.tp_snaplen);
+				total_bytes += ppd.v2->tp_h.tp_snaplen;
+				break;
+			}
+
+			status_bar_update();
+			total_packets++;
+
+			__v1_v2_rx_user_ready(ppd.raw, ring->version);
+
+			frame_num = (frame_num + 1) % ring->rd_num;
+		}
+
+		poll(&pfd, 1, 1);
+	}
+
+	pair_udp_close(udp_sock);
+
+	if (total_packets != 2 * NUM_PACKETS) {
+		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+			ring->version, total_packets, NUM_PACKETS);
+		exit(1);
+	}
+
+	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
+{
+	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_SEND_REQUEST;
+	__sync_synchronize();
+}
+
+static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_SEND_REQUEST;
+	__sync_synchronize();
+}
+
+static inline int __v1_v2_tx_kernel_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		return __v1_tx_kernel_ready(base);
+	case TPACKET_V2:
+		return __v2_tx_kernel_ready(base);
+	default:
+		bug_on(1);
+		return 0;
+	}
+}
+
+static inline void __v1_v2_tx_user_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		__v1_tx_user_ready(base);
+		break;
+	case TPACKET_V2:
+		__v2_tx_user_ready(base);
+		break;
+	}
+}
+
+static void __v1_v2_set_packet_loss_discard(int sock)
+{
+	int ret, discard = 1;
+
+	ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
+			 sizeof(discard));
+	if (ret == -1) {
+		perror("setsockopt");
+		exit(1);
+	}
+}
+
+static void walk_v1_v2_tx(int sock, struct ring *ring)
+{
+	struct pollfd pfd;
+	int rcv_sock, ret;
+	size_t packet_len;
+	union frame_map ppd;
+	char packet[1024];
+	unsigned int frame_num = 0, got = 0;
+	struct sockaddr_ll ll = {
+		.sll_family = PF_PACKET,
+		.sll_halen = ETH_ALEN,
+	};
+
+	bug_on(ring->type != PACKET_TX_RING);
+	bug_on(ring->rd_num < NUM_PACKETS);
+
+	rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	if (rcv_sock == -1) {
+		perror("socket");
+		exit(1);
+	}
+
+	pair_udp_setfilter(rcv_sock);
+
+	ll.sll_ifindex = if_nametoindex("lo");
+	ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
+	if (ret == -1) {
+		perror("bind");
+		exit(1);
+	}
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = sock;
+	pfd.events = POLLOUT | POLLERR;
+	pfd.revents = 0;
+
+	total_packets = NUM_PACKETS;
+	create_payload(packet, &packet_len);
+
+	while (total_packets > 0) {
+		while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
+					       ring->version) &&
+		       total_packets > 0) {
+			ppd.raw = ring->rd[frame_num].iov_base;
+
+			switch (ring->version) {
+			case TPACKET_V1:
+				ppd.v1->tp_h.tp_snaplen = packet_len;
+				ppd.v1->tp_h.tp_len = packet_len;
+
+				memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
+				       sizeof(struct sockaddr_ll), packet,
+				       packet_len);
+				total_bytes += ppd.v1->tp_h.tp_snaplen;
+				break;
+
+			case TPACKET_V2:
+				ppd.v2->tp_h.tp_snaplen = packet_len;
+				ppd.v2->tp_h.tp_len = packet_len;
+
+				memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
+				       sizeof(struct sockaddr_ll), packet,
+				       packet_len);
+				total_bytes += ppd.v2->tp_h.tp_snaplen;
+				break;
+			}
+
+			status_bar_update();
+			total_packets--;
+
+			__v1_v2_tx_user_ready(ppd.raw, ring->version);
+
+			frame_num = (frame_num + 1) % ring->rd_num;
+		}
+
+		poll(&pfd, 1, 1);
+	}
+
+	bug_on(total_packets != 0);
+
+	ret = sendto(sock, NULL, 0, 0, NULL, 0);
+	if (ret == -1) {
+		perror("sendto");
+		exit(1);
+	}
+
+	while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
+			       0, NULL, NULL)) > 0 &&
+	       total_packets < NUM_PACKETS) {
+		got += ret;
+		test_payload(packet, ret);
+
+		status_bar_update();
+		total_packets++;
+	}
+
+	close(rcv_sock);
+
+	if (total_packets != NUM_PACKETS) {
+		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+			ring->version, total_packets, NUM_PACKETS);
+		exit(1);
+	}
+
+	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
+}
+
+static void walk_v1_v2(int sock, struct ring *ring)
+{
+	if (ring->type == PACKET_RX_RING)
+		walk_v1_v2_rx(sock, ring);
+	else
+		walk_v1_v2_tx(sock, ring);
+}
+
+static uint64_t __v3_prev_block_seq_num = 0;
+
+void __v3_test_block_seq_num(struct block_desc *pbd)
+{
+	if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
+		fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
+			"seq:%"PRIu64" != actual seq:%"PRIu64"\n",
+			__v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
+			(uint64_t) pbd->h1.seq_num);
+		exit(1);
+	}
+
+	__v3_prev_block_seq_num = pbd->h1.seq_num;
+}
+
+static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
+{
+	if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
+		fprintf(stderr, "\nblock:%u with %upackets, expected "
+			"len:%u != actual len:%u\n", block_num,
+			pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
+		exit(1);
+	}
+}
+
+static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
+{
+	if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
+		fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
+		exit(1);
+	}
+
+	__v3_test_block_seq_num(pbd);
+}
+
+static void __v3_walk_block(struct block_desc *pbd, const int block_num)
+{
+	int num_pkts = pbd->h1.num_pkts, i;
+	unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
+	struct tpacket3_hdr *ppd;
+
+	__v3_test_block_header(pbd, block_num);
+
+	ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
+				       pbd->h1.offset_to_first_pkt);
+
+	for (i = 0; i < num_pkts; ++i) {
+		bytes += ppd->tp_snaplen;
+
+		if (ppd->tp_next_offset)
+			bytes_with_padding += ppd->tp_next_offset;
+		else
+			bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
+
+		test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+		status_bar_update();
+		total_packets++;
+
+		ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
+		__sync_synchronize();
+	}
+
+	__v3_test_block_len(pbd, bytes_with_padding, block_num);
+	total_bytes += bytes;
+}
+
+void __v3_flush_block(struct block_desc *pbd)
+{
+	pbd->h1.block_status = TP_STATUS_KERNEL;
+	__sync_synchronize();
+}
+
+static void walk_v3_rx(int sock, struct ring *ring)
+{
+	unsigned int block_num = 0;
+	struct pollfd pfd;
+	struct block_desc *pbd;
+	int udp_sock[2];
+
+	bug_on(ring->type != PACKET_RX_RING);
+
+	pair_udp_open(udp_sock, PORT_BASE);
+	pair_udp_setfilter(sock);
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = sock;
+	pfd.events = POLLIN | POLLERR;
+	pfd.revents = 0;
+
+	pair_udp_send(udp_sock, NUM_PACKETS);
+
+	while (total_packets < NUM_PACKETS * 2) {
+		pbd = (struct block_desc *) ring->rd[block_num].iov_base;
+
+		while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
+			poll(&pfd, 1, 1);
+
+		__v3_walk_block(pbd, block_num);
+		__v3_flush_block(pbd);
+
+		block_num = (block_num + 1) % ring->rd_num;
+	}
+
+	pair_udp_close(udp_sock);
+
+	if (total_packets != 2 * NUM_PACKETS) {
+		fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
+			total_packets, NUM_PACKETS);
+		exit(1);
+	}
+
+	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static void walk_v3(int sock, struct ring *ring)
+{
+	if (ring->type == PACKET_RX_RING)
+		walk_v3_rx(sock, ring);
+	else
+		bug_on(1);
+}
+
+static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
+{
+	ring->req.tp_block_size = getpagesize() << 2;
+	ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
+	ring->req.tp_block_nr = blocks;
+
+	ring->req.tp_frame_nr = ring->req.tp_block_size /
+				ring->req.tp_frame_size *
+				ring->req.tp_block_nr;
+
+	ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
+	ring->walk = walk_v1_v2;
+	ring->rd_num = ring->req.tp_frame_nr;
+	ring->flen = ring->req.tp_frame_size;
+}
+
+static void __v3_fill(struct ring *ring, unsigned int blocks)
+{
+	ring->req3.tp_retire_blk_tov = 64;
+	ring->req3.tp_sizeof_priv = 0;
+	ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+	ring->req3.tp_block_size = getpagesize() << 2;
+	ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
+	ring->req3.tp_block_nr = blocks;
+
+	ring->req3.tp_frame_nr = ring->req3.tp_block_size /
+				 ring->req3.tp_frame_size *
+				 ring->req3.tp_block_nr;
+
+	ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
+	ring->walk = walk_v3;
+	ring->rd_num = ring->req3.tp_block_nr;
+	ring->flen = ring->req3.tp_block_size;
+}
+
+static void setup_ring(int sock, struct ring *ring, int version, int type)
+{
+	int ret = 0;
+	unsigned int blocks = 256;
+
+	ring->type = type;
+	ring->version = version;
+
+	switch (version) {
+	case TPACKET_V1:
+	case TPACKET_V2:
+		if (type == PACKET_TX_RING)
+			__v1_v2_set_packet_loss_discard(sock);
+		__v1_v2_fill(ring, blocks);
+		ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
+				 sizeof(ring->req));
+		break;
+
+	case TPACKET_V3:
+		__v3_fill(ring, blocks);
+		ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
+				 sizeof(ring->req3));
+		break;
+	}
+
+	if (ret == -1) {
+		perror("setsockopt");
+		exit(1);
+	}
+
+	ring->rd_len = ring->rd_num * sizeof(*ring->rd);
+	ring->rd = malloc(ring->rd_len);
+	if (ring->rd == NULL) {
+		perror("malloc");
+		exit(1);
+	}
+
+	total_packets = 0;
+	total_bytes = 0;
+}
+
+static void mmap_ring(int sock, struct ring *ring)
+{
+	int i;
+
+	ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
+			      MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
+	if (ring->mm_space == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	memset(ring->rd, 0, ring->rd_len);
+	for (i = 0; i < ring->rd_num; ++i) {
+		ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
+		ring->rd[i].iov_len = ring->flen;
+	}
+}
+
+static void bind_ring(int sock, struct ring *ring)
+{
+	int ret;
+
+	ring->ll.sll_family = PF_PACKET;
+	ring->ll.sll_protocol = htons(ETH_P_ALL);
+	ring->ll.sll_ifindex = if_nametoindex("lo");
+	ring->ll.sll_hatype = 0;
+	ring->ll.sll_pkttype = 0;
+	ring->ll.sll_halen = 0;
+
+	ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
+	if (ret == -1) {
+		perror("bind");
+		exit(1);
+	}
+}
+
+static void walk_ring(int sock, struct ring *ring)
+{
+	ring->walk(sock, ring);
+}
+
+static void unmap_ring(int sock, struct ring *ring)
+{
+	munmap(ring->mm_space, ring->mm_len);
+	free(ring->rd);
+}
+
+static int test_kernel_bit_width(void)
+{
+	char in[512], *ptr;
+	int num = 0, fd;
+	ssize_t ret;
+
+	fd = open("/proc/kallsyms", O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
+
+	ret = read(fd, in, sizeof(in));
+	if (ret <= 0) {
+		perror("read");
+		exit(1);
+	}
+
+	close(fd);
+
+	ptr = in;
+	while(!isspace(*ptr)) {
+		num++;
+		ptr++;
+	}
+
+	return num * 4;
+}
+
+static int test_user_bit_width(void)
+{
+	return __WORDSIZE;
+}
+
+static const char *tpacket_str[] = {
+	[TPACKET_V1] = "TPACKET_V1",
+	[TPACKET_V2] = "TPACKET_V2",
+	[TPACKET_V3] = "TPACKET_V3",
+};
+
+static const char *type_str[] = {
+	[PACKET_RX_RING] = "PACKET_RX_RING",
+	[PACKET_TX_RING] = "PACKET_TX_RING",
+};
+
+static int test_tpacket(int version, int type)
+{
+	int sock;
+	struct ring ring;
+
+	fprintf(stderr, "test: %s with %s ", tpacket_str[version],
+		type_str[type]);
+	fflush(stderr);
+
+	if (version == TPACKET_V1 &&
+	    test_kernel_bit_width() != test_user_bit_width()) {
+		fprintf(stderr, "test: skip %s %s since user and kernel "
+			"space have different bit width\n",
+			tpacket_str[version], type_str[type]);
+		return 0;
+	}
+
+	sock = pfsocket(version);
+	memset(&ring, 0, sizeof(ring));
+	setup_ring(sock, &ring, version, type);
+	mmap_ring(sock, &ring);
+	bind_ring(sock, &ring);
+	walk_ring(sock, &ring);
+	unmap_ring(sock, &ring);
+	close(sock);
+
+	fprintf(stderr, "\n");
+	return 0;
+}
+
+int main(void)
+{
+	int ret = 0;
+
+	ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
+	ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
+
+	ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
+	ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
+
+	ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
+
+	if (ret)
+		return 1;
+
+	printf("OK. All tests passed\n");
+	return 0;
+}
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@ -0,0 +1,26 @@
+#!/bin/sh
+
+if [ $(id -u) != 0 ]; then
+	echo $msg must be run as root >&2
+	exit 0
+fi
+
+echo "--------------------"
+echo "running psock_fanout test"
+echo "--------------------"
+./psock_fanout
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running psock_tpacket test"
+echo "--------------------"
+./psock_tpacket
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@ -0,0 +1,12 @@
+#!/bin/bash
+
+echo "--------------------"
+echo "running socket test"
+echo "--------------------"
+./socket
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
+
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@ -0,0 +1,92 @@
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+struct socket_testcase {
+	int	domain;
+	int	type;
+	int	protocol;
+
+	/* 0    = valid file descriptor
+	 * -foo = error foo
+	 */
+	int	expect;
+
+	/* If non-zero, accept EAFNOSUPPORT to handle the case
+	 * of the protocol not being configured into the kernel.
+	 */
+	int	nosupport_ok;
+};
+
+static struct socket_testcase tests[] = {
+	{ AF_MAX,  0,           0,           -EAFNOSUPPORT,    0 },
+	{ AF_INET, SOCK_STREAM, IPPROTO_TCP, 0,                1  },
+	{ AF_INET, SOCK_DGRAM,  IPPROTO_TCP, -EPROTONOSUPPORT, 1  },
+	{ AF_INET, SOCK_DGRAM,  IPPROTO_UDP, 0,                1  },
+	{ AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1  },
+};
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define ERR_STRING_SZ	64
+
+static int run_tests(void)
+{
+	char err_string1[ERR_STRING_SZ];
+	char err_string2[ERR_STRING_SZ];
+	int i, err;
+
+	err = 0;
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct socket_testcase *s = &tests[i];
+		int fd;
+
+		fd = socket(s->domain, s->type, s->protocol);
+		if (fd < 0) {
+			if (s->nosupport_ok &&
+			    errno == EAFNOSUPPORT)
+				continue;
+
+			if (s->expect < 0 &&
+			    errno == -s->expect)
+				continue;
+
+			strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+			strerror_r(errno, err_string2, ERR_STRING_SZ);
+
+			fprintf(stderr, "socket(%d, %d, %d) expected "
+				"err (%s) got (%s)\n",
+				s->domain, s->type, s->protocol,
+				err_string1, err_string2);
+
+			err = -1;
+			break;
+		} else {
+			close(fd);
+
+			if (s->expect < 0) {
+				strerror_r(errno, err_string1, ERR_STRING_SZ);
+
+				fprintf(stderr, "socket(%d, %d, %d) expected "
+					"success got err (%s)\n",
+					s->domain, s->type, s->protocol,
+					err_string1);
+
+				err = -1;
+				break;
+			}
+		}
+	}
+
+	return err;
+}
+
+int main(void)
+{
+	int err = run_tests();
+
+	return err;
+}
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@ -0,0 +1,39 @@
+# Makefile for powerpc selftests
+
+# ARCH can be overridden by the user for cross compiling
+ARCH ?= $(shell uname -m)
+ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
+
+ifeq ($(ARCH),powerpc)
+
+GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
+
+CC := $(CROSS_COMPILE)$(CC)
+CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+
+export CC CFLAGS
+
+TARGETS = pmu copyloops mm tm primitives
+
+endif
+
+all: $(TARGETS)
+
+$(TARGETS):
+	$(MAKE) -k -C $@ all
+
+run_tests: all
+	@for TARGET in $(TARGETS); do \
+		$(MAKE) -C $$TARGET run_tests; \
+	done;
+
+clean:
+	@for TARGET in $(TARGETS); do \
+		$(MAKE) -C $$TARGET clean; \
+	done;
+	rm -f tags
+
+tags:
+	find . -name '*.c' -o -name '*.h' | xargs ctags
+
+.PHONY: all run_tests clean tags $(TARGETS)
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@ -0,0 +1,29 @@
+# The loops are all 64-bit code
+CFLAGS += -m64
+CFLAGS += -I$(CURDIR)
+CFLAGS += -D SELFTEST
+
+# Use our CFLAGS for the implicit .S rule
+ASFLAGS = $(CFLAGS)
+
+PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+EXTRA_SOURCES := validate.c ../harness.c
+
+all: $(PROGS)
+
+copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
+copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
+memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
+memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
+
+$(PROGS): $(EXTRA_SOURCES)
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS) *.o
+
+.PHONY: all run_tests clean
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@ -0,0 +1,89 @@
+#include <ppc-asm.h>
+
+#define CONFIG_ALTIVEC
+
+#define r1	1
+
+#define vr0     0
+#define vr1     1
+#define vr2     2
+#define vr3     3
+#define vr4     4
+#define vr5     5
+#define vr6     6
+#define vr7     7
+#define vr8     8
+#define vr9     9
+#define vr10    10
+#define vr11    11
+#define vr12    12
+#define vr13    13
+#define vr14    14
+#define vr15    15
+#define vr16    16
+#define vr17    17
+#define vr18    18
+#define vr19    19
+#define vr20    20
+#define vr21    21
+#define vr22    22
+#define vr23    23
+#define vr24    24
+#define vr25    25
+#define vr26    26
+#define vr27    27
+#define vr28    28
+#define vr29    29
+#define vr30    30
+#define vr31    31
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) _GLOBAL(A)
+
+#define PPC_MTOCRF(A, B)	mtocrf A, B
+
+FUNC_START(enter_vmx_usercopy)
+	li	r3,1
+	blr
+
+FUNC_START(exit_vmx_usercopy)
+	li	r3,0
+	blr
+
+FUNC_START(enter_vmx_copy)
+	li	r3,1
+	blr
+
+FUNC_START(exit_vmx_copy)
+	blr
+
+FUNC_START(memcpy_power7)
+	blr
+
+FUNC_START(__copy_tofrom_user_power7)
+	blr
+
+FUNC_START(__copy_tofrom_user_base)
+	blr
+
+#define BEGIN_FTR_SECTION
+#define FTR_SECTION_ELSE
+#define ALT_FTR_SECTION_END_IFCLR(x)
+#define ALT_FTR_SECTION_END(x, y)
+#define END_FTR_SECTION_IFCLR(x)
--- a/tools/testing/selftests/powerpc/copyloops/asm/processor.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
--- a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_64.S
--- a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_power7.S
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_64.S
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_power7.S
--- a/tools/testing/selftests/powerpc/copyloops/validate.c
+++ b/tools/testing/selftests/powerpc/copyloops/validate.c
@ -0,0 +1,99 @@
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "../utils.h"
+
+#define MAX_LEN 8192
+#define MAX_OFFSET 16
+#define MIN_REDZONE 128
+#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
+#define POISON 0xa5
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+
+static void do_one(char *src, char *dst, unsigned long src_off,
+		   unsigned long dst_off, unsigned long len, void *redzone,
+		   void *fill)
+{
+	char *srcp, *dstp;
+	unsigned long ret;
+	unsigned long i;
+
+	srcp = src + MIN_REDZONE + src_off;
+	dstp = dst + MIN_REDZONE + dst_off;
+
+	memset(src, POISON, BUFLEN);
+	memset(dst, POISON, BUFLEN);
+	memcpy(srcp, fill, len);
+
+	ret = COPY_LOOP(dstp, srcp, len);
+	if (ret && ret != (unsigned long)dstp) {
+		printf("(%p,%p,%ld) returned %ld\n", dstp, srcp, len, ret);
+		abort();
+	}
+
+	if (memcmp(dstp, srcp, len)) {
+		printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
+		printf("src: ");
+		for (i = 0; i < len; i++)
+			printf("%02x ", srcp[i]);
+		printf("\ndst: ");
+		for (i = 0; i < len; i++)
+			printf("%02x ", dstp[i]);
+		printf("\n");
+		abort();
+	}
+
+	if (memcmp(dst, redzone, dstp - dst)) {
+		printf("(%p,%p,%ld) redzone before corrupted\n",
+		       dstp, srcp, len);
+		abort();
+	}
+
+	if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
+		printf("(%p,%p,%ld) redzone after corrupted\n",
+		       dstp, srcp, len);
+		abort();
+	}
+}
+
+int test_copy_loop(void)
+{
+	char *src, *dst, *redzone, *fill;
+	unsigned long len, src_off, dst_off;
+	unsigned long i;
+
+	src = memalign(BUFLEN, BUFLEN);
+	dst = memalign(BUFLEN, BUFLEN);
+	redzone = malloc(BUFLEN);
+	fill = malloc(BUFLEN);
+
+	if (!src || !dst || !redzone || !fill) {
+		fprintf(stderr, "malloc failed\n");
+		exit(1);
+	}
+
+	memset(redzone, POISON, BUFLEN);
+
+	/* Fill with sequential bytes */
+	for (i = 0; i < BUFLEN; i++)
+		fill[i] = i & 0xff;
+
+	for (len = 1; len < MAX_LEN; len++) {
+		for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
+			for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
+				do_one(src, dst, src_off, dst_off, len,
+				       redzone, fill);
+			}
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_copy_loop, str(COPY_LOOP));
+}
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@ -0,0 +1,114 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "subunit.h"
+#include "utils.h"
+
+#define TIMEOUT		120
+#define KILL_TIMEOUT	5
+
+
+int run_test(int (test_function)(void), char *name)
+{
+	bool terminated;
+	int rc, status;
+	pid_t pid;
+
+	/* Make sure output is flushed before forking */
+	fflush(stdout);
+
+	pid = fork();
+	if (pid == 0) {
+		setpgid(0, 0);
+		exit(test_function());
+	} else if (pid == -1) {
+		perror("fork");
+		return 1;
+	}
+
+	setpgid(pid, pid);
+
+	/* Wake us up in timeout seconds */
+	alarm(TIMEOUT);
+	terminated = false;
+
+wait:
+	rc = waitpid(pid, &status, 0);
+	if (rc == -1) {
+		if (errno != EINTR) {
+			printf("unknown error from waitpid\n");
+			return 1;
+		}
+
+		if (terminated) {
+			printf("!! force killing %s\n", name);
+			kill(-pid, SIGKILL);
+			return 1;
+		} else {
+			printf("!! killing %s\n", name);
+			kill(-pid, SIGTERM);
+			terminated = true;
+			alarm(KILL_TIMEOUT);
+			goto wait;
+		}
+	}
+
+	/* Kill anything else in the process group that is still running */
+	kill(-pid, SIGTERM);
+
+	if (WIFEXITED(status))
+		status = WEXITSTATUS(status);
+	else {
+		if (WIFSIGNALED(status))
+			printf("!! child died by signal %d\n", WTERMSIG(status));
+		else
+			printf("!! child died by unknown cause\n");
+
+		status = 1; /* Signal or other */
+	}
+
+	return status;
+}
+
+static void alarm_handler(int signum)
+{
+	/* Jut wake us up from waitpid */
+}
+
+static struct sigaction alarm_action = {
+	.sa_handler = alarm_handler,
+};
+
+int test_harness(int (test_function)(void), char *name)
+{
+	int rc;
+
+	test_start(name);
+	test_set_git_version(GIT_VERSION);
+
+	if (sigaction(SIGALRM, &alarm_action, NULL)) {
+		perror("sigaction");
+		test_error(name);
+		return 1;
+	}
+
+	rc = run_test(test_function, name);
+
+	if (rc == MAGIC_SKIP_RETURN_VALUE)
+		test_skip(name);
+	else
+		test_finish(name, rc);
+
+	return rc;
+}
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@ -0,0 +1,18 @@
+noarg:
+	$(MAKE) -C ../
+
+PROGS := hugetlb_vs_thp_test
+
+all: $(PROGS)
+
+$(PROGS): ../harness.c
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS)
+
+.PHONY: all run_tests clean
--- a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
+++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+/* This must match the huge page & THP size */
+#define SIZE	(16 * 1024 * 1024)
+
+static int test_body(void)
+{
+	void *addr;
+	char *p;
+
+	addr = (void *)0xa0000000;
+
+	p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+		 MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p != MAP_FAILED) {
+		/*
+		 * Typically the mmap will fail because no huge pages are
+		 * allocated on the system. But if there are huge pages
+		 * allocated the mmap will succeed. That's fine too, we just
+		 * munmap here before continuing.
+		 */
+		munmap(addr, SIZE);
+	}
+
+	p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p == MAP_FAILED) {
+		printf("Mapping failed @ %p\n", addr);
+		perror("mmap");
+		return 1;
+	}
+
+	/*
+	 * Either a user or kernel access is sufficient to trigger the bug.
+	 * A kernel access is easier to spot & debug, as it will trigger the
+	 * softlockup or RCU stall detectors, and when the system is kicked
+	 * into xmon we get a backtrace in the kernel.
+	 *
+	 * A good option is:
+	 *  getcwd(p, SIZE);
+	 *
+	 * For the purposes of this testcase it's preferable to spin in
+	 * userspace, so the harness can kill us if we get stuck. That way we
+	 * see a test failure rather than a dead system.
+	 */
+	*p = 0xf;
+
+	munmap(addr, SIZE);
+
+	return 0;
+}
+
+static int test_main(void)
+{
+	int i;
+
+	/* 10,000 because it's a "bunch", and completes reasonably quickly */
+	for (i = 0; i < 10000; i++)
+		if (test_body())
+			return 1;
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_main, "hugetlb_vs_thp");
+}
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@ -0,0 +1,38 @@
+noarg:
+	$(MAKE) -C ../
+
+PROGS := count_instructions l3_bank_test per_event_excludes
+EXTRA_SOURCES := ../harness.c event.c lib.c
+
+SUB_TARGETS = ebb
+
+all: $(PROGS) $(SUB_TARGETS)
+
+$(PROGS): $(EXTRA_SOURCES)
+
+# loop.S can only be built 64-bit
+count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
+	$(CC) $(CFLAGS) -m64 -o $@ $^
+
+run_tests: all sub_run_tests
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean: sub_clean
+	rm -f $(PROGS) loop.o
+
+$(SUB_TARGETS):
+	$(MAKE) -k -C $@ all
+
+sub_run_tests: all
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET run_tests; \
+	done;
+
+sub_clean:
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET clean; \
+	done;
+
+.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
--- a/tools/testing/selftests/powerpc/pmu/count_instructions.c
+++ b/tools/testing/selftests/powerpc/pmu/count_instructions.c
@ -0,0 +1,147 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop(u64 loops);
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+	event_init_opts(e, config, PERF_TYPE_HARDWARE, name);
+
+	e->attr.disabled = 1;
+	e->attr.exclude_kernel = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+			 u64 overhead, bool report)
+{
+	s64 difference, expected;
+	double percentage;
+
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Run for 1M instructions */
+	thirty_two_instruction_loop(instructions >> 5);
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	event_read(&events[0]);
+	event_read(&events[1]);
+
+	expected = instructions + overhead;
+	difference = events[0].result.value - expected;
+	percentage = (double)difference / events[0].result.value * 100;
+
+	if (report) {
+		event_report(&events[0]);
+		event_report(&events[1]);
+
+		printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+		printf("Expected %llu\n", expected);
+		printf("Actual   %llu\n", events[0].result.value);
+		printf("Delta    %lld, %f%%\n", difference, percentage);
+	}
+
+	event_reset(&events[0]);
+	event_reset(&events[1]);
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference below 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / events[0].result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+	u64 current, overhead;
+	int i;
+
+	do_count_loop(events, 0, 0, false);
+	overhead = events[0].result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(events, 0, 0, false);
+		current = events[0].result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %llu with %llu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+static int test_body(void)
+{
+	struct event events[2];
+	u64 overhead;
+
+	setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+	setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+	if (event_open(&events[0])) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	if (event_open_with_group(&events[1], events[0].fd)) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	overhead = determine_overhead(events);
+	printf("Overhead of null loop: %llu instructions\n", overhead);
+
+	/* Run for 1Mi instructions */
+	FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+	/* Run for 10Mi instructions */
+	FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+	/* Run for 100Mi instructions */
+	FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+	/* Run for 1Bi instructions */
+	FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+	/* Run for 16Bi instructions */
+	FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+	/* Run for 64Bi instructions */
+	FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+	event_close(&events[0]);
+	event_close(&events[1]);
+
+	return 0;
+}
+
+static int count_instructions(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(count_instructions, "count_instructions");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@ -0,0 +1,33 @@
+noarg:
+	$(MAKE) -C ../../
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+PROGS := reg_access_test event_attributes_test cycles_test	\
+	 cycles_with_freeze_test pmc56_overflow_test		\
+	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
+	 cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test	\
+	 task_event_pinned_vs_ebb_test multi_ebb_procs_test	\
+	 multi_counter_test pmae_handling_test			\
+	 close_clears_pmcc_test instruction_count_test		\
+	 fork_cleanup_test ebb_on_child_test			\
+	 ebb_on_willing_child_test back_to_back_ebbs_test	\
+	 lost_exception_test no_handler_test			\
+	 cycles_with_mmcr2_test
+
+all: $(PROGS)
+
+$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
+
+instruction_count_test: ../loop.S
+
+lost_exception_test: ../lib.c
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS)
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+#define NUMBER_OF_EBBS	50
+
+/*
+ * Test that if we overflow the counter while in the EBB handler, we take
+ * another EBB on exiting from the handler.
+ *
+ * We do this by counting with a stupidly low sample period, causing us to
+ * overflow the PMU while we're still in the EBB handler, leading to another
+ * EBB.
+ *
+ * We get out of what would otherwise be an infinite loop by leaving the
+ * counter frozen once we've taken enough EBBs.
+ */
+
+static void ebb_callee(void)
+{
+	uint64_t siar, val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	/* Resets the PMC */
+	count_pmc(1, sample_period);
+
+out:
+	if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
+		/* Reset but leave counters frozen */
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		/* Unfreezes */
+		reset_ebb();
+
+	/* Do some stuff to chew some cycles and pop the counter */
+	siar = mfspr(SPRN_SIAR);
+	trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
+
+	val = mfspr(SPRN_PMC1);
+	trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+}
+
+int back_to_back_ebbs(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = 5;
+
+	ebb_freeze_pmcs();
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	ebb_global_enable();
+	ebb_unfreeze_pmcs();
+
+	while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
+++ b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
@ -0,0 +1,271 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(core_busy_loop)
+	stdu	%r1, -168(%r1)
+	std	r14, 160(%r1)
+	std	r15, 152(%r1)
+	std	r16, 144(%r1)
+	std	r17, 136(%r1)
+	std	r18, 128(%r1)
+	std	r19, 120(%r1)
+	std	r20, 112(%r1)
+	std	r21, 104(%r1)
+	std	r22, 96(%r1)
+	std	r23, 88(%r1)
+	std	r24, 80(%r1)
+	std	r25, 72(%r1)
+	std	r26, 64(%r1)
+	std	r27, 56(%r1)
+	std	r28, 48(%r1)
+	std	r29, 40(%r1)
+	std	r30, 32(%r1)
+	std	r31, 24(%r1)
+
+	li	 r3, 0x3030
+	std	 r3, -96(%r1)
+	li	 r4, 0x4040
+	std	 r4, -104(%r1)
+	li	 r5, 0x5050
+	std	 r5, -112(%r1)
+	li	 r6, 0x6060
+	std	 r6, -120(%r1)
+	li	 r7, 0x7070
+	std	 r7, -128(%r1)
+	li	 r8, 0x0808
+	std	 r8, -136(%r1)
+	li	 r9, 0x0909
+	std	 r9, -144(%r1)
+	li	r10, 0x1010
+	std	r10, -152(%r1)
+	li	r11, 0x1111
+	std	r11, -160(%r1)
+	li	r14, 0x1414
+	std	r14, -168(%r1)
+	li	r15, 0x1515
+	std	r15, -176(%r1)
+	li	r16, 0x1616
+	std	r16, -184(%r1)
+	li	r17, 0x1717
+	std	r17, -192(%r1)
+	li	r18, 0x1818
+	std	r18, -200(%r1)
+	li	r19, 0x1919
+	std	r19, -208(%r1)
+	li	r20, 0x2020
+	std	r20, -216(%r1)
+	li	r21, 0x2121
+	std	r21, -224(%r1)
+	li	r22, 0x2222
+	std	r22, -232(%r1)
+	li	r23, 0x2323
+	std	r23, -240(%r1)
+	li	r24, 0x2424
+	std	r24, -248(%r1)
+	li	r25, 0x2525
+	std	r25, -256(%r1)
+	li	r26, 0x2626
+	std	r26, -264(%r1)
+	li	r27, 0x2727
+	std	r27, -272(%r1)
+	li	r28, 0x2828
+	std	r28, -280(%r1)
+	li	r29, 0x2929
+	std	r29, -288(%r1)
+	li	r30, 0x3030
+	li	r31, 0x3131
+
+	li	r3, 0
+0:	addi	r3, r3, 1
+	cmpwi	r3, 100
+	blt	0b
+
+	/* Return 1 (fail) unless we get through all the checks */
+	li	r3, 1
+
+	/* Check none of our registers have been corrupted */
+	cmpwi	r4,  0x4040
+	bne	1f
+	cmpwi	r5,  0x5050
+	bne	1f
+	cmpwi	r6,  0x6060
+	bne	1f
+	cmpwi	r7,  0x7070
+	bne	1f
+	cmpwi	r8,  0x0808
+	bne	1f
+	cmpwi	r9,  0x0909
+	bne	1f
+	cmpwi	r10, 0x1010
+	bne	1f
+	cmpwi	r11, 0x1111
+	bne	1f
+	cmpwi	r14, 0x1414
+	bne	1f
+	cmpwi	r15, 0x1515
+	bne	1f
+	cmpwi	r16, 0x1616
+	bne	1f
+	cmpwi	r17, 0x1717
+	bne	1f
+	cmpwi	r18, 0x1818
+	bne	1f
+	cmpwi	r19, 0x1919
+	bne	1f
+	cmpwi	r20, 0x2020
+	bne	1f
+	cmpwi	r21, 0x2121
+	bne	1f
+	cmpwi	r22, 0x2222
+	bne	1f
+	cmpwi	r23, 0x2323
+	bne	1f
+	cmpwi	r24, 0x2424
+	bne	1f
+	cmpwi	r25, 0x2525
+	bne	1f
+	cmpwi	r26, 0x2626
+	bne	1f
+	cmpwi	r27, 0x2727
+	bne	1f
+	cmpwi	r28, 0x2828
+	bne	1f
+	cmpwi	r29, 0x2929
+	bne	1f
+	cmpwi	r30, 0x3030
+	bne	1f
+	cmpwi	r31, 0x3131
+	bne	1f
+
+	/* Load junk into all our registers before we reload them from the stack. */
+	li	r3,  0xde
+	li	r4,  0xad
+	li	r5,  0xbe
+	li	r6,  0xef
+	li	r7,  0xde
+	li	r8,  0xad
+	li	r9,  0xbe
+	li	r10, 0xef
+	li	r11, 0xde
+	li	r14, 0xad
+	li	r15, 0xbe
+	li	r16, 0xef
+	li	r17, 0xde
+	li	r18, 0xad
+	li	r19, 0xbe
+	li	r20, 0xef
+	li	r21, 0xde
+	li	r22, 0xad
+	li	r23, 0xbe
+	li	r24, 0xef
+	li	r25, 0xde
+	li	r26, 0xad
+	li	r27, 0xbe
+	li	r28, 0xef
+	li	r29, 0xdd
+
+	ld	r3,	-96(%r1)
+	cmpwi	r3,  0x3030
+	bne	1f
+	ld	r4,	-104(%r1)
+	cmpwi	r4,  0x4040
+	bne	1f
+	ld	r5,	-112(%r1)
+	cmpwi	r5,  0x5050
+	bne	1f
+	ld	r6,	-120(%r1)
+	cmpwi	r6,  0x6060
+	bne	1f
+	ld	r7,	-128(%r1)
+	cmpwi	r7,  0x7070
+	bne	1f
+	ld	r8,	-136(%r1)
+	cmpwi	r8,  0x0808
+	bne	1f
+	ld	r9,	-144(%r1)
+	cmpwi	r9,  0x0909
+	bne	1f
+	ld	r10, -152(%r1)
+	cmpwi	r10, 0x1010
+	bne	1f
+	ld	r11, -160(%r1)
+	cmpwi	r11, 0x1111
+	bne	1f
+	ld	r14, -168(%r1)
+	cmpwi	r14, 0x1414
+	bne	1f
+	ld	r15, -176(%r1)
+	cmpwi	r15, 0x1515
+	bne	1f
+	ld	r16, -184(%r1)
+	cmpwi	r16, 0x1616
+	bne	1f
+	ld	r17, -192(%r1)
+	cmpwi	r17, 0x1717
+	bne	1f
+	ld	r18, -200(%r1)
+	cmpwi	r18, 0x1818
+	bne	1f
+	ld	r19, -208(%r1)
+	cmpwi	r19, 0x1919
+	bne	1f
+	ld	r20, -216(%r1)
+	cmpwi	r20, 0x2020
+	bne	1f
+	ld	r21, -224(%r1)
+	cmpwi	r21, 0x2121
+	bne	1f
+	ld	r22, -232(%r1)
+	cmpwi	r22, 0x2222
+	bne	1f
+	ld	r23, -240(%r1)
+	cmpwi	r23, 0x2323
+	bne	1f
+	ld	r24, -248(%r1)
+	cmpwi	r24, 0x2424
+	bne	1f
+	ld	r25, -256(%r1)
+	cmpwi	r25, 0x2525
+	bne	1f
+	ld	r26, -264(%r1)
+	cmpwi	r26, 0x2626
+	bne	1f
+	ld	r27, -272(%r1)
+	cmpwi	r27, 0x2727
+	bne	1f
+	ld	r28, -280(%r1)
+	cmpwi	r28, 0x2828
+	bne	1f
+	ld	r29, -288(%r1)
+	cmpwi	r29, 0x2929
+	bne	1f
+
+	/* Load 0 (success) to return */
+	li	r3, 0
+
+1:	ld	r14, 160(%r1)
+	ld	r15, 152(%r1)
+	ld	r16, 144(%r1)
+	ld	r17, 136(%r1)
+	ld	r18, 128(%r1)
+	ld	r19, 120(%r1)
+	ld	r20, 112(%r1)
+	ld	r21, 104(%r1)
+	ld	r22, 96(%r1)
+	ld	r23, 88(%r1)
+	ld	r24, 80(%r1)
+	ld	r25, 72(%r1)
+	ld	r26, 64(%r1)
+	ld	r27, 56(%r1)
+	ld	r28, 48(%r1)
+	ld	r29, 40(%r1)
+	ld	r30, 32(%r1)
+	ld	r31, 24(%r1)
+	addi	%r1, %r1, 168
+	blr
--- a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
@ -0,0 +1,59 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC, preventing further access
+ * by userspace to the PMU hardware.
+ */
+
+int close_clears_pmcc(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* The real test is here, do we take a SIGILL when writing PMU regs now
+	 * that we have closed the event. We expect that we will. */
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We should still be able to read EBB regs though */
+	mfspr(SPRN_EBBHR);
+	mfspr(SPRN_EBBRR);
+	mfspr(SPRN_BESCR);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(close_clears_pmcc, "close_clears_pmcc");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
+ * should remain and the EBB event should fail to enable.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event should have run */
+	FAIL_IF(event.result.value == 0);
+	FAIL_IF(event.result.enabled != event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@ -0,0 +1,89 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect the child to succeed */
+	FAIL_IF(wait_for_child(pid));
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@ -0,0 +1,58 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Basic test that counts user cycles and takes EBBs.
+ */
+int cycles(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 10) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(!ebb_check_count(1, sample_period, 100));
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles, "cycles");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@ -0,0 +1,117 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while using MMCR0_FC (freeze counters) to only count
+ * parts of the code. This is complicated by the fact that FC is set by the
+ * hardware when the event overflows. We may take the EBB after we have set FC,
+ * so we have to be careful about whether we clear FC at the end of the EBB
+ * handler or not.
+ */
+
+static bool counters_frozen = false;
+static int ebbs_while_frozen = 0;
+
+static void ebb_callee(void)
+{
+	uint64_t mask, val;
+
+	mask = MMCR0_PMAO | MMCR0_FC;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	if (counters_frozen) {
+		trace_log_string(ebb_state.trace, "frozen");
+		ebbs_while_frozen++;
+		mask &= ~MMCR0_FC;
+	}
+
+	count_pmc(1, sample_period);
+out:
+	reset_ebb_with_clear_mask(mask);
+}
+
+int cycles_with_freeze(void)
+{
+	struct event event;
+	uint64_t val;
+	bool fc_cleared;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	fc_cleared = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		counters_frozen = false;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+		FAIL_IF(core_busy_loop());
+
+		counters_frozen = true;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
+
+		val = mfspr(SPRN_MMCR0);
+		if (! (val & MMCR0_FC)) {
+			printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
+			fc_cleared = true;
+		}
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	printf("EBBs while frozen %d\n", ebbs_while_frozen);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(fc_cleared);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_freeze, "cycles_with_freeze");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while manipulating the user accessible bits in MMCR2.
+ */
+
+/* We use two values because the first freezes PMC1 and so we would get no EBBs */
+#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define MMCR2_EXPECTED_2 0x0020100804020000UL /* (     FC2P|FC3P|FC4P|FC5P|FC6P) */
+
+
+int cycles_with_mmcr2(void)
+{
+	struct event event;
+	uint64_t val, expected[2], actual;
+	int i;
+	bool bad_mmcr2;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* XXX Set of MMCR2 must be after enable */
+	expected[0] = MMCR2_EXPECTED_1;
+	expected[1] = MMCR2_EXPECTED_2;
+	i = 0;
+	bad_mmcr2 = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		mtspr(SPRN_MMCR2, expected[i % 2]);
+
+		FAIL_IF(core_busy_loop());
+
+		val = mfspr(SPRN_MMCR2);
+		if (val != expected[i % 2]) {
+			bad_mmcr2 = true;
+			actual = val;
+		}
+
+		i++;
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	if (bad_mmcr2)
+		printf("Bad MMCR2 value seen is 0x%lx\n", actual);
+
+	FAIL_IF(bad_mmcr2);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@ -0,0 +1,478 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <sched.h>
+#include <sys/wait.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "trace.h"
+#include "reg.h"
+#include "ebb.h"
+
+
+void (*ebb_user_func)(void);
+
+void ebb_hook(void)
+{
+	if (ebb_user_func)
+		ebb_user_func();
+}
+
+struct ebb_state ebb_state;
+
+u64 sample_period = 0x40000000ull;
+
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
+{
+	u64 val;
+
+	/* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
+	/* 3) set MMCR0[PMAE]	- docs say BESCR[PME] should do this */
+	val = mfspr(SPRN_MMCR0);
+	mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
+
+	/* 4) clear BESCR[PMEO] */
+	mtspr(SPRN_BESCRR, BESCR_PMEO);
+
+	/* 5) set BESCR[PME] */
+	mtspr(SPRN_BESCRS, BESCR_PME);
+
+	/* 6) rfebb 1 - done in our caller */
+}
+
+void reset_ebb(void)
+{
+	reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
+}
+
+/* Called outside of the EBB handler to check MMCR0 is sane */
+int ebb_check_mmcr0(void)
+{
+	u64 val;
+
+	val = mfspr(SPRN_MMCR0);
+	if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
+		/* It's OK if we see FC & PMAO, but not FC by itself */
+		printf("Outside of loop, only FC set 0x%llx\n", val);
+		return 1;
+	}
+
+	return 0;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge)
+{
+	u64 count, upper, lower;
+
+	count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
+
+	lower = ebb_state.stats.ebb_count * (sample_period - fudge);
+
+	if (count < lower) {
+		printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
+			pmc, count, lower, lower - count);
+		return false;
+	}
+
+	upper = ebb_state.stats.ebb_count * (sample_period + fudge);
+
+	if (count > upper) {
+		printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
+			pmc, count, upper, count - upper);
+		return false;
+	}
+
+	printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
+		pmc, count, lower, upper, count - lower, upper - count);
+
+	return true;
+}
+
+void standard_ebb_callee(void)
+{
+	int found, i;
+	u64 val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	found = 0;
+	for (i = 1; i <= 6; i++) {
+		if (ebb_state.pmc_enable[PMC_INDEX(i)])
+			found += count_pmc(i, sample_period);
+	}
+
+	if (!found)
+		ebb_state.stats.no_overflow++;
+
+out:
+	reset_ebb();
+}
+
+extern void ebb_handler(void);
+
+void setup_ebb_handler(void (*callee)(void))
+{
+	u64 entry;
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	entry = (u64)ebb_handler;
+#else
+	struct opd
+	{
+	    u64 entry;
+	    u64 toc;
+	} *opd;
+
+	opd = (struct opd *)ebb_handler;
+	entry = opd->entry;
+#endif
+	printf("EBB Handler is at %#llx\n", entry);
+
+	ebb_user_func = callee;
+
+	/* Ensure ebb_user_func is set before we set the handler */
+	mb();
+	mtspr(SPRN_EBBHR, entry);
+
+	/* Make sure the handler is set before we return */
+	mb();
+}
+
+void clear_ebb_stats(void)
+{
+	memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
+}
+
+void dump_summary_ebb_state(void)
+{
+	printf("ebb_state:\n"			\
+	       "  ebb_count    = %d\n"		\
+	       "  spurious     = %d\n"		\
+	       "  negative     = %d\n"		\
+	       "  no_overflow  = %d\n"		\
+	       "  pmc[1] count = 0x%llx\n"	\
+	       "  pmc[2] count = 0x%llx\n"	\
+	       "  pmc[3] count = 0x%llx\n"	\
+	       "  pmc[4] count = 0x%llx\n"	\
+	       "  pmc[5] count = 0x%llx\n"	\
+	       "  pmc[6] count = 0x%llx\n",
+		ebb_state.stats.ebb_count, ebb_state.stats.spurious,
+		ebb_state.stats.negative, ebb_state.stats.no_overflow,
+		ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
+		ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
+		ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
+}
+
+static char *decode_mmcr0(u32 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1 << 31))
+		strcat(buf, "FC ");
+	if (value & (1 << 26))
+		strcat(buf, "PMAE ");
+	if (value & (1 << 7))
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+static char *decode_bescr(u64 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1ull << 63))
+		strcat(buf, "GE ");
+	if (value & (1ull << 32))
+		strcat(buf, "PMAE ");
+	if (value & 1)
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+void dump_ebb_hw_state(void)
+{
+	u64 bescr;
+	u32 mmcr0;
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+	bescr = mfspr(SPRN_BESCR);
+
+	printf("HW state:\n"		\
+	       "MMCR0 0x%016x %s\n"	\
+	       "MMCR2 0x%016lx\n"	\
+	       "EBBHR 0x%016lx\n"	\
+	       "BESCR 0x%016llx %s\n"	\
+	       "PMC1  0x%016lx\n"	\
+	       "PMC2  0x%016lx\n"	\
+	       "PMC3  0x%016lx\n"	\
+	       "PMC4  0x%016lx\n"	\
+	       "PMC5  0x%016lx\n"	\
+	       "PMC6  0x%016lx\n"	\
+	       "SIAR  0x%016lx\n",
+	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
+	       mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
+	       mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
+	       mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
+	       mfspr(SPRN_SIAR));
+}
+
+void dump_ebb_state(void)
+{
+	dump_summary_ebb_state();
+
+	dump_ebb_hw_state();
+
+	trace_buffer_print(ebb_state.trace);
+}
+
+int count_pmc(int pmc, uint32_t sample_period)
+{
+	uint32_t start_value;
+	u64 val;
+
+	/* 0) Read PMC */
+	start_value = pmc_sample_period(sample_period);
+
+	val = read_pmc(pmc);
+	if (val < start_value)
+		ebb_state.stats.negative++;
+	else
+		ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
+
+	trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
+
+	/* 1) Reset PMC */
+	write_pmc(pmc, start_value);
+
+	/* Report if we overflowed */
+	return val >= COUNTER_OVERFLOW;
+}
+
+int ebb_event_enable(struct event *e)
+{
+	int rc;
+
+	/* Ensure any SPR writes are ordered vs us */
+	mb();
+
+	rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+	if (rc)
+		return rc;
+
+	rc = event_read(e);
+
+	/* Ditto */
+	mb();
+
+	return rc;
+}
+
+void ebb_freeze_pmcs(void)
+{
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+	mb();
+}
+
+void ebb_unfreeze_pmcs(void)
+{
+	/* Unfreeze counters */
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	mb();
+}
+
+void ebb_global_enable(void)
+{
+	/* Enable EBBs globally and PMU EBBs */
+	mtspr(SPRN_BESCR, 0x8000000100000000ull);
+	mb();
+}
+
+void ebb_global_disable(void)
+{
+	/* Disable EBBs & freeze counters, events are still scheduled */
+	mtspr(SPRN_BESCRR, BESCR_PME);
+	mb();
+}
+
+void event_ebb_init(struct event *e)
+{
+	e->attr.config |= (1ull << 63);
+}
+
+void event_bhrb_init(struct event *e, unsigned ifm)
+{
+	e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
+}
+
+void event_leader_ebb_init(struct event *e)
+{
+	event_ebb_init(e);
+
+	e->attr.exclusive = 1;
+	e->attr.pinned = 1;
+}
+
+int ebb_child(union pipe read_pipe, union pipe write_pipe)
+{
+	struct event event;
+	uint64_t val;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(event_enable(&event));
+
+	if (event_read(&event)) {
+		/*
+		 * Some tests expect to fail here, so don't report an error on
+		 * this line, and return a distinguisable error code. Tell the
+		 * parent an error happened.
+		 */
+		notify_parent_of_error(write_pipe);
+		return 2;
+	}
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	FAIL_IF(notify_parent(write_pipe));
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+
+		/* To try and hit SIGILL case */
+		val  = mfspr(SPRN_MMCRA);
+		val |= mfspr(SPRN_MMCR2);
+		val |= mfspr(SPRN_MMCR0);
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+static jmp_buf setjmp_env;
+
+static void sigill_handler(int signal)
+{
+	printf("Took sigill\n");
+	longjmp(setjmp_env, 1);
+}
+
+static struct sigaction sigill_action = {
+	.sa_handler = sigill_handler,
+};
+
+int catch_sigill(void (*func)(void))
+{
+	if (sigaction(SIGILL, &sigill_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	if (setjmp(setjmp_env) == 0) {
+		func();
+		return 1;
+	}
+
+	return 0;
+}
+
+void write_pmc1(void)
+{
+	mtspr(SPRN_PMC1, 0);
+}
+
+void write_pmc(int pmc, u64 value)
+{
+	switch (pmc) {
+		case 1: mtspr(SPRN_PMC1, value); break;
+		case 2: mtspr(SPRN_PMC2, value); break;
+		case 3: mtspr(SPRN_PMC3, value); break;
+		case 4: mtspr(SPRN_PMC4, value); break;
+		case 5: mtspr(SPRN_PMC5, value); break;
+		case 6: mtspr(SPRN_PMC6, value); break;
+	}
+}
+
+u64 read_pmc(int pmc)
+{
+	switch (pmc) {
+		case 1: return mfspr(SPRN_PMC1);
+		case 2: return mfspr(SPRN_PMC2);
+		case 3: return mfspr(SPRN_PMC3);
+		case 4: return mfspr(SPRN_PMC4);
+		case 5: return mfspr(SPRN_PMC5);
+		case 6: return mfspr(SPRN_PMC6);
+	}
+
+	return 0;
+}
+
+static void term_handler(int signal)
+{
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+	abort();
+}
+
+struct sigaction term_action = {
+	.sa_handler = term_handler,
+};
+
+static void __attribute__((constructor)) ebb_init(void)
+{
+	clear_ebb_stats();
+
+	if (sigaction(SIGTERM, &term_action, NULL))
+		perror("sigaction");
+
+	ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@ -0,0 +1,77 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+
+#include "../event.h"
+#include "../lib.h"
+#include "trace.h"
+#include "reg.h"
+
+#define PMC_INDEX(pmc)	((pmc)-1)
+
+#define NUM_PMC_VALUES	128
+
+struct ebb_state
+{
+	struct {
+		u64 pmc_count[6];
+		volatile int ebb_count;
+		int spurious;
+		int negative;
+		int no_overflow;
+	} stats;
+
+	bool pmc_enable[6];
+	struct trace_buffer *trace;
+};
+
+extern struct ebb_state ebb_state;
+
+#define COUNTER_OVERFLOW 0x80000000ull
+
+static inline uint32_t pmc_sample_period(uint32_t value)
+{
+	return COUNTER_OVERFLOW - value;
+}
+
+static inline void ebb_enable_pmc_counting(int pmc)
+{
+	ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge);
+void event_leader_ebb_init(struct event *e);
+void event_ebb_init(struct event *e);
+void event_bhrb_init(struct event *e, unsigned ifm);
+void setup_ebb_handler(void (*callee)(void));
+void standard_ebb_callee(void);
+int ebb_event_enable(struct event *e);
+void ebb_global_enable(void);
+void ebb_global_disable(void);
+void ebb_freeze_pmcs(void);
+void ebb_unfreeze_pmcs(void);
+void event_ebb_init(struct event *e);
+void event_leader_ebb_init(struct event *e);
+int count_pmc(int pmc, uint32_t sample_period);
+void dump_ebb_state(void);
+void dump_summary_ebb_state(void);
+void dump_ebb_hw_state(void);
+void clear_ebb_stats(void);
+void write_pmc(int pmc, u64 value);
+u64 read_pmc(int pmc);
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
+void reset_ebb(void);
+int ebb_check_mmcr0(void);
+
+extern u64 sample_period;
+
+int core_busy_loop(void);
+int ebb_child(union pipe read_pipe, union pipe write_pipe);
+int catch_sigill(void (*func)(void));
+void write_pmc1(void);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
@ -0,0 +1,365 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
+#define r1 1
+#define r2 2
+
+#define RFEBB   .long 0x4c000924
+
+/* Stack layout:
+ *
+ *                   ^
+ *  User stack       |
+ *  Back chain ------+	<- r1		<-------+
+ *  ...						|
+ *  Red zone / ABI Gap				|
+ *  ...						|
+ *  vr63	<+				|
+ *  vr0		 |				|
+ *  VSCR	 |				|
+ *  FSCR	 |				|
+ *  r31		 | Save area			|
+ *  r0		 |				|
+ *  XER		 |				|
+ *  CTR		 |				|
+ *  LR		 |				|
+ *  CCR		<+				|
+ *  ...		<+				|
+ *  LR		 | Caller frame			|
+ *  CCR		 |				|
+ *  Back chain	<+	<- updated r1	--------+
+ *
+ */
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ABIGAP		512
+#else
+#define ABIGAP		288
+#endif
+
+#define NR_GPR		32
+#define NR_SPR		6
+#define NR_VSR		64
+
+#define SAVE_AREA	((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
+#define CALLER_FRAME	112
+
+#define STACK_FRAME	(ABIGAP + SAVE_AREA + CALLER_FRAME)
+
+#define CCR_SAVE	(CALLER_FRAME)
+#define LR_SAVE		(CCR_SAVE + 8)
+#define CTR_SAVE	(LR_SAVE  + 8)
+#define XER_SAVE	(CTR_SAVE + 8)
+#define GPR_SAVE(n)	(XER_SAVE + 8 + (8 * n))
+#define FSCR_SAVE	(GPR_SAVE(31) + 8)
+#define VSCR_SAVE	(FSCR_SAVE + 8)
+#define VSR_SAVE(n)	(VSCR_SAVE + 8 + (16 * n))
+
+#define SAVE_GPR(n)	std n,GPR_SAVE(n)(r1)
+#define REST_GPR(n)	ld  n,GPR_SAVE(n)(r1)
+#define TRASH_GPR(n)	lis n,0xaaaa
+
+#define SAVE_VSR(n, b)	li b, VSR_SAVE(n); stxvd2x n,b,r1
+#define LOAD_VSR(n, b)	li b, VSR_SAVE(n); lxvd2x  n,b,r1
+
+#define LOAD_REG_IMMEDIATE(reg,expr)	\
+	lis     reg,(expr)@highest;	\
+	ori     reg,reg,(expr)@higher;	\
+	rldicr  reg,reg,32,31;		\
+	oris    reg,reg,(expr)@h;	\
+	ori     reg,reg,(expr)@l;
+
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ENTRY_POINT(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+	FUNC_NAME(name):
+
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer using our entry point */	\
+	LOAD_REG_IMMEDIATE(r12, name)				\
+0:	addis	r2,r12,(.TOC.-0b)@ha;				\
+	addi	r2,r2,(.TOC.-0b)@l;
+
+#else
+#define ENTRY_POINT(name) FUNC_START(name)
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer via our opd entry */	\
+	LOAD_REG_IMMEDIATE(r2, name)			\
+	ld      r2,8(r2);
+#endif
+
+    .text
+
+ENTRY_POINT(ebb_handler)
+    stdu    r1,-STACK_FRAME(r1)
+    SAVE_GPR(0)
+    mflr    r0
+    std     r0,LR_SAVE(r1)
+    mfcr    r0
+    std     r0,CCR_SAVE(r1)
+    mfctr   r0
+    std     r0,CTR_SAVE(r1)
+    mfxer   r0
+    std     r0,XER_SAVE(r1)
+    SAVE_GPR(2)
+    SAVE_GPR(3)
+    SAVE_GPR(4)
+    SAVE_GPR(5)
+    SAVE_GPR(6)
+    SAVE_GPR(7)
+    SAVE_GPR(8)
+    SAVE_GPR(9)
+    SAVE_GPR(10)
+    SAVE_GPR(11)
+    SAVE_GPR(12)
+    SAVE_GPR(13)
+    SAVE_GPR(14)
+    SAVE_GPR(15)
+    SAVE_GPR(16)
+    SAVE_GPR(17)
+    SAVE_GPR(18)
+    SAVE_GPR(19)
+    SAVE_GPR(20)
+    SAVE_GPR(21)
+    SAVE_GPR(22)
+    SAVE_GPR(23)
+    SAVE_GPR(24)
+    SAVE_GPR(25)
+    SAVE_GPR(26)
+    SAVE_GPR(27)
+    SAVE_GPR(28)
+    SAVE_GPR(29)
+    SAVE_GPR(30)
+    SAVE_GPR(31)
+    SAVE_VSR(0, r3)
+    mffs     f0
+    stfd     f0, FSCR_SAVE(r1)
+    mfvscr   f0
+    stfd     f0, VSCR_SAVE(r1)
+    SAVE_VSR(1,  r3)
+    SAVE_VSR(2,  r3)
+    SAVE_VSR(3,  r3)
+    SAVE_VSR(4,  r3)
+    SAVE_VSR(5,  r3)
+    SAVE_VSR(6,  r3)
+    SAVE_VSR(7,  r3)
+    SAVE_VSR(8,  r3)
+    SAVE_VSR(9,  r3)
+    SAVE_VSR(10, r3)
+    SAVE_VSR(11, r3)
+    SAVE_VSR(12, r3)
+    SAVE_VSR(13, r3)
+    SAVE_VSR(14, r3)
+    SAVE_VSR(15, r3)
+    SAVE_VSR(16, r3)
+    SAVE_VSR(17, r3)
+    SAVE_VSR(18, r3)
+    SAVE_VSR(19, r3)
+    SAVE_VSR(20, r3)
+    SAVE_VSR(21, r3)
+    SAVE_VSR(22, r3)
+    SAVE_VSR(23, r3)
+    SAVE_VSR(24, r3)
+    SAVE_VSR(25, r3)
+    SAVE_VSR(26, r3)
+    SAVE_VSR(27, r3)
+    SAVE_VSR(28, r3)
+    SAVE_VSR(29, r3)
+    SAVE_VSR(30, r3)
+    SAVE_VSR(31, r3)
+    SAVE_VSR(32, r3)
+    SAVE_VSR(33, r3)
+    SAVE_VSR(34, r3)
+    SAVE_VSR(35, r3)
+    SAVE_VSR(36, r3)
+    SAVE_VSR(37, r3)
+    SAVE_VSR(38, r3)
+    SAVE_VSR(39, r3)
+    SAVE_VSR(40, r3)
+    SAVE_VSR(41, r3)
+    SAVE_VSR(42, r3)
+    SAVE_VSR(43, r3)
+    SAVE_VSR(44, r3)
+    SAVE_VSR(45, r3)
+    SAVE_VSR(46, r3)
+    SAVE_VSR(47, r3)
+    SAVE_VSR(48, r3)
+    SAVE_VSR(49, r3)
+    SAVE_VSR(50, r3)
+    SAVE_VSR(51, r3)
+    SAVE_VSR(52, r3)
+    SAVE_VSR(53, r3)
+    SAVE_VSR(54, r3)
+    SAVE_VSR(55, r3)
+    SAVE_VSR(56, r3)
+    SAVE_VSR(57, r3)
+    SAVE_VSR(58, r3)
+    SAVE_VSR(59, r3)
+    SAVE_VSR(60, r3)
+    SAVE_VSR(61, r3)
+    SAVE_VSR(62, r3)
+    SAVE_VSR(63, r3)
+
+    TRASH_GPR(2)
+    TRASH_GPR(3)
+    TRASH_GPR(4)
+    TRASH_GPR(5)
+    TRASH_GPR(6)
+    TRASH_GPR(7)
+    TRASH_GPR(8)
+    TRASH_GPR(9)
+    TRASH_GPR(10)
+    TRASH_GPR(11)
+    TRASH_GPR(12)
+    TRASH_GPR(14)
+    TRASH_GPR(15)
+    TRASH_GPR(16)
+    TRASH_GPR(17)
+    TRASH_GPR(18)
+    TRASH_GPR(19)
+    TRASH_GPR(20)
+    TRASH_GPR(21)
+    TRASH_GPR(22)
+    TRASH_GPR(23)
+    TRASH_GPR(24)
+    TRASH_GPR(25)
+    TRASH_GPR(26)
+    TRASH_GPR(27)
+    TRASH_GPR(28)
+    TRASH_GPR(29)
+    TRASH_GPR(30)
+    TRASH_GPR(31)
+
+    RESTORE_TOC(ebb_handler)
+
+    /*
+     * r13 is our TLS pointer. We leave whatever value was in there when the
+     * EBB fired. That seems to be OK because once set the TLS pointer is not
+     * changed - but presumably that could change in future.
+     */
+
+    bl      ebb_hook
+    nop
+
+    /* r2 may be changed here but we don't care */
+
+    lfd      f0, FSCR_SAVE(r1)
+    mtfsf    0xff,f0
+    lfd      f0, VSCR_SAVE(r1)
+    mtvscr   f0
+    LOAD_VSR(0, r3)
+    LOAD_VSR(1,  r3)
+    LOAD_VSR(2,  r3)
+    LOAD_VSR(3,  r3)
+    LOAD_VSR(4,  r3)
+    LOAD_VSR(5,  r3)
+    LOAD_VSR(6,  r3)
+    LOAD_VSR(7,  r3)
+    LOAD_VSR(8,  r3)
+    LOAD_VSR(9,  r3)
+    LOAD_VSR(10, r3)
+    LOAD_VSR(11, r3)
+    LOAD_VSR(12, r3)
+    LOAD_VSR(13, r3)
+    LOAD_VSR(14, r3)
+    LOAD_VSR(15, r3)
+    LOAD_VSR(16, r3)
+    LOAD_VSR(17, r3)
+    LOAD_VSR(18, r3)
+    LOAD_VSR(19, r3)
+    LOAD_VSR(20, r3)
+    LOAD_VSR(21, r3)
+    LOAD_VSR(22, r3)
+    LOAD_VSR(23, r3)
+    LOAD_VSR(24, r3)
+    LOAD_VSR(25, r3)
+    LOAD_VSR(26, r3)
+    LOAD_VSR(27, r3)
+    LOAD_VSR(28, r3)
+    LOAD_VSR(29, r3)
+    LOAD_VSR(30, r3)
+    LOAD_VSR(31, r3)
+    LOAD_VSR(32, r3)
+    LOAD_VSR(33, r3)
+    LOAD_VSR(34, r3)
+    LOAD_VSR(35, r3)
+    LOAD_VSR(36, r3)
+    LOAD_VSR(37, r3)
+    LOAD_VSR(38, r3)
+    LOAD_VSR(39, r3)
+    LOAD_VSR(40, r3)
+    LOAD_VSR(41, r3)
+    LOAD_VSR(42, r3)
+    LOAD_VSR(43, r3)
+    LOAD_VSR(44, r3)
+    LOAD_VSR(45, r3)
+    LOAD_VSR(46, r3)
+    LOAD_VSR(47, r3)
+    LOAD_VSR(48, r3)
+    LOAD_VSR(49, r3)
+    LOAD_VSR(50, r3)
+    LOAD_VSR(51, r3)
+    LOAD_VSR(52, r3)
+    LOAD_VSR(53, r3)
+    LOAD_VSR(54, r3)
+    LOAD_VSR(55, r3)
+    LOAD_VSR(56, r3)
+    LOAD_VSR(57, r3)
+    LOAD_VSR(58, r3)
+    LOAD_VSR(59, r3)
+    LOAD_VSR(60, r3)
+    LOAD_VSR(61, r3)
+    LOAD_VSR(62, r3)
+    LOAD_VSR(63, r3)
+
+    ld      r0,XER_SAVE(r1)
+    mtxer   r0
+    ld      r0,CTR_SAVE(r1)
+    mtctr   r0
+    ld      r0,LR_SAVE(r1)
+    mtlr    r0
+    ld      r0,CCR_SAVE(r1)
+    mtcr    r0
+    REST_GPR(0)
+    REST_GPR(2)
+    REST_GPR(3)
+    REST_GPR(4)
+    REST_GPR(5)
+    REST_GPR(6)
+    REST_GPR(7)
+    REST_GPR(8)
+    REST_GPR(9)
+    REST_GPR(10)
+    REST_GPR(11)
+    REST_GPR(12)
+    REST_GPR(13)
+    REST_GPR(14)
+    REST_GPR(15)
+    REST_GPR(16)
+    REST_GPR(17)
+    REST_GPR(18)
+    REST_GPR(19)
+    REST_GPR(20)
+    REST_GPR(21)
+    REST_GPR(22)
+    REST_GPR(23)
+    REST_GPR(24)
+    REST_GPR(25)
+    REST_GPR(26)
+    REST_GPR(27)
+    REST_GPR(28)
+    REST_GPR(29)
+    REST_GPR(30)
+    REST_GPR(31)
+    addi    r1,r1,STACK_FRAME
+    RFEBB
+FUNC_END(ebb_handler)
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. Nothing interesting happens, because
+ * even though the event is enabled and running the child hasn't enabled the
+ * actual delivery of the EBBs.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	int i;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Parent creates EBB event */
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Check the EBB is enabled by writing PMC1 */
+	write_pmc1();
+
+	/* EBB event is enabled here */
+	for (i = 0; i < 1000000; i++) ;
+
+	return 0;
+}
+
+int ebb_on_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child should just exit happily */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_child, "ebb_on_child");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@ -0,0 +1,92 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. The child expects this and enables
+ * EBBs, which are then delivered to the child, even though the event is
+ * created by the parent.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	/* Setup our EBB handler, before the EBB event is created */
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+/* Tests we can setup an EBB on our child - if it's expecting it */
+int ebb_on_willing_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to setup its EBB handler */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	/* Child show now take EBBs and then exit */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int ebb_vs_cpu_event(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Now try to install our CPU event */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* .. and wait for it to complete */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run, but we don't expect 100% */
+	FAIL_IF(event.result.enabled >= event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
@ -0,0 +1,131 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test various attributes of the EBB event are enforced.
+ */
+int event_attributes(void)
+{
+	struct event event, leader;
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to succeed */
+	FAIL_IF(event_open(&event));
+	event_close(&event);
+
+
+	event_init(&event, 0x001e); /* CYCLES - no PMC specified */
+	event_leader_ebb_init(&event);
+	/* Expected to fail, no PMC specified */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+	event.attr.exclusive = 0;
+	/* Expected to fail, not exclusive */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x3001e);
+	event_leader_ebb_init(&event);
+	event.attr.freq = 1;
+	/* Expected to fail, sets freq */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x4001e);
+	event_leader_ebb_init(&event);
+	event.attr.sample_period = 1;
+	/* Expected to fail, sets sample_period */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.enable_on_exec = 1;
+	/* Expected to fail, sets enable_on_exec */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.inherit = 1;
+	/* Expected to fail, sets inherit */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to succeed */
+	FAIL_IF(event_open_with_group(&event, leader.fd));
+	event_close(&leader);
+	event_close(&event);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+
+	/* Expected to fail, event doesn't request EBB, leader does */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	/* Clear the EBB flag */
+	leader.attr.config &= ~(1ull << 63);
+
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to fail, leader doesn't request EBB */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.exclusive = 0;
+	/* Expected to fail, leader isn't exclusive */
+	FAIL_IF(event_open(&leader) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.pinned = 0;
+	/* Expected to fail, leader isn't pinned */
+	FAIL_IF(event_open(&leader) == 0);
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to fail, not a task event */
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(&event, 0) == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(event_attributes, "event_attributes");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
@ -0,0 +1,43 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(thirty_two_instruction_loop)
+	cmpwi	r3,0
+	beqlr
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
--- a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
@ -0,0 +1,79 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
+ * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
+ * the PMU.
+ */
+
+static struct event event;
+
+static int child(void)
+{
+	/* Even though we have EBE=0 we can still see the EBB regs */
+	FAIL_IF(mfspr(SPRN_BESCR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBHR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBRR) != 0);
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We can still read from the event, though it is on our parent */
+	FAIL_IF(event_read(&event));
+
+	return 0;
+}
+
+/* Tests that fork clears EBB state */
+int fork_cleanup(void)
+{
+	pid_t pid;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Don't need to actually take any EBBs */
+
+	pid = fork();
+	if (pid == 0)
+		exit(child());
+
+	/* Child does the actual testing */
+	FAIL_IF(wait_for_child(pid));
+
+	/* After fork */
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(fork_cleanup, "fork_cleanup");
+}
--- a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
@ -0,0 +1,164 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Run a calibrated instruction loop and count instructions executed using
+ * EBBs. Make sure the counts look right.
+ */
+
+extern void thirty_two_instruction_loop(uint64_t loops);
+
+static bool counters_frozen = true;
+
+static int do_count_loop(struct event *event, uint64_t instructions,
+			 uint64_t overhead, bool report)
+{
+	int64_t difference, expected;
+	double percentage;
+
+	clear_ebb_stats();
+
+	counters_frozen = false;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+	thirty_two_instruction_loop(instructions >> 5);
+
+	counters_frozen = true;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+	count_pmc(4, sample_period);
+
+	event->result.value = ebb_state.stats.pmc_count[4-1];
+	expected = instructions + overhead;
+	difference = event->result.value - expected;
+	percentage = (double)difference / event->result.value * 100;
+
+	if (report) {
+		printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
+		printf("Expected %lu\n", expected);
+		printf("Actual   %llu\n", event->result.value);
+		printf("Error    %ld, %f%%\n", difference, percentage);
+		printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
+	}
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference of up to 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / event->result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static uint64_t determine_overhead(struct event *event)
+{
+	uint64_t current, overhead;
+	int i;
+
+	do_count_loop(event, 0, 0, false);
+	overhead = event->result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(event, 0, 0, false);
+		current = event->result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %lu with %lu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+static void pmc4_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(4, sample_period);
+out:
+	if (counters_frozen)
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		reset_ebb();
+}
+
+int instruction_count(void)
+{
+	struct event event;
+	uint64_t overhead;
+
+	event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
+	event_leader_ebb_init(&event);
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = COUNTER_OVERFLOW;
+
+	setup_ebb_handler(pmc4_ebb_callee);
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	ebb_global_enable();
+
+	overhead = determine_overhead(&event);
+	printf("Overhead of null loop: %lu instructions\n", overhead);
+
+	/* Run for 1M instructions */
+	FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
+
+	/* Run for 10M instructions */
+	FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
+
+	/* Run for 100M instructions */
+	FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
+
+	/* Run for 1G instructions */
+	FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
+
+	/* Run for 16G instructions */
+	FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
+
+	/* Run for 64G instructions */
+	FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
+
+	/* Run for 128G instructions */
+	FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
+
+	ebb_global_disable();
+	event_close(&event);
+
+	printf("Finished OK\n");
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(instruction_count, "instruction_count");
+}
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`../../../../../arch/powerpc/lib/copyuser_64.S`
				`@ -0,0 +1 @@`
				`../../../../../arch/powerpc/lib/memcpy_power7.S`