Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

14
tools/virtio/Makefile Normal file
View file

@ -0,0 +1,14 @@
all: test mod
test: virtio_test vringh_test
virtio_test: virtio_ring.o virtio_test.o
vringh_test: vringh_test.o vringh.o virtio_ring.o
CFLAGS += -g -O2 -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
vpath %.c ../../drivers/virtio ../../drivers/vhost
mod:
${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
.PHONY: all test mod clean
clean:
${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
vhost_test/Module.symvers vhost_test/modules.order *.d
-include *.d

View file

@ -0,0 +1,14 @@
#if defined(__i386__) || defined(__x86_64__)
#define barrier() asm volatile("" ::: "memory")
#define mb() __sync_synchronize()
#define smp_mb() mb()
# define smp_rmb() barrier()
# define smp_wmb() barrier()
/* Weak barriers should be used. If not - it's a bug */
# define rmb() abort()
# define wmb() abort()
#else
#error Please fill in barrier macros
#endif

10
tools/virtio/linux/bug.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef BUG_H
#define BUG_H
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
#define BUILD_BUG_ON(x)
#define BUG() abort()
#endif /* BUG_H */

View file

@ -0,0 +1,2 @@
#ifndef LINUX_DEVICE_H
#endif

26
tools/virtio/linux/err.h Normal file
View file

@ -0,0 +1,26 @@
#ifndef ERR_H
#define ERR_H
#define MAX_ERRNO 4095
#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
static inline void * __must_check ERR_PTR(long error)
{
return (void *) error;
}
static inline long __must_check PTR_ERR(const void *ptr)
{
return (long) ptr;
}
static inline long __must_check IS_ERR(const void *ptr)
{
return IS_ERR_VALUE((unsigned long)ptr);
}
static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
{
return !ptr || IS_ERR_VALUE((unsigned long)ptr);
}
#endif /* ERR_H */

View file

View file

@ -0,0 +1 @@
#include "../../../include/linux/irqreturn.h"

105
tools/virtio/linux/kernel.h Normal file
View file

@ -0,0 +1,105 @@
#ifndef KERNEL_H
#define KERNEL_H
#include <stdbool.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdarg.h>
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>
#include <errno.h>
#include <unistd.h>
#include <asm/barrier.h>
#define CONFIG_SMP
#define PAGE_SIZE getpagesize()
#define PAGE_MASK (~(PAGE_SIZE-1))
typedef unsigned long long dma_addr_t;
typedef size_t __kernel_size_t;
struct page {
unsigned long long dummy;
};
/* Physical == Virtual */
#define virt_to_phys(p) ((unsigned long)p)
#define phys_to_virt(a) ((void *)(unsigned long)(a))
/* Page address: Virtual / 4K */
#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p))
#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK))
#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE)
#define __printf(a,b) __attribute__((format(printf,a,b)))
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
static inline void *kmalloc(size_t s, gfp_t gfp)
{
if (__kmalloc_fake)
return __kmalloc_fake;
return malloc(s);
}
static inline void kfree(void *p)
{
if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
return;
free(p);
}
static inline void *krealloc(void *p, size_t s, gfp_t gfp)
{
return realloc(p, s);
}
static inline unsigned long __get_free_page(gfp_t gfp)
{
void *p;
posix_memalign(&p, PAGE_SIZE, PAGE_SIZE);
return (unsigned long)p;
}
static inline void free_page(unsigned long addr)
{
free((void *)addr);
}
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
#define uninitialized_var(x) x = x
# ifndef likely
# define likely(x) (__builtin_expect(!!(x), 1))
# endif
# ifndef unlikely
# define unlikely(x) (__builtin_expect(!!(x), 0))
# endif
#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#ifdef DEBUG
#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#else
#define pr_debug(format, ...) do {} while (0)
#endif
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
#endif /* KERNEL_H */

View file

@ -0,0 +1,3 @@
static inline void kmemleak_ignore(const void *ptr)
{
}

View file

@ -0,0 +1,6 @@
#include <linux/export.h>
#define MODULE_LICENSE(__MODULE_LICENSE_value) \
static __attribute__((unused)) const char *__MODULE_LICENSE_name = \
__MODULE_LICENSE_value

View file

@ -0,0 +1,4 @@
#include "../../../include/linux/kern_levels.h"
#define printk printf
#define vprintk vprintf

View file

@ -0,0 +1,4 @@
#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0
#define __ratelimit(x) (*(x))

View file

@ -0,0 +1,189 @@
#ifndef SCATTERLIST_H
#define SCATTERLIST_H
#include <linux/kernel.h>
struct scatterlist {
unsigned long page_link;
unsigned int offset;
unsigned int length;
dma_addr_t dma_address;
};
/* Scatterlist helpers, stolen from linux/scatterlist.h */
#define sg_is_chain(sg) ((sg)->page_link & 0x01)
#define sg_is_last(sg) ((sg)->page_link & 0x02)
#define sg_chain_ptr(sg) \
((struct scatterlist *) ((sg)->page_link & ~0x03))
/**
* sg_assign_page - Assign a given page to an SG entry
* @sg: SG entry
* @page: The page
*
* Description:
* Assign page to sg entry. Also see sg_set_page(), the most commonly used
* variant.
*
**/
static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
{
unsigned long page_link = sg->page_link & 0x3;
/*
* In order for the low bit stealing approach to work, pages
* must be aligned at a 32-bit boundary as a minimum.
*/
BUG_ON((unsigned long) page & 0x03);
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
BUG_ON(sg_is_chain(sg));
#endif
sg->page_link = page_link | (unsigned long) page;
}
/**
* sg_set_page - Set sg entry to point at given page
* @sg: SG entry
* @page: The page
* @len: Length of data
* @offset: Offset into page
*
* Description:
* Use this function to set an sg entry pointing at a page, never assign
* the page directly. We encode sg table information in the lower bits
* of the page pointer. See sg_page() for looking up the page belonging
* to an sg entry.
*
**/
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
unsigned int len, unsigned int offset)
{
sg_assign_page(sg, page);
sg->offset = offset;
sg->length = len;
}
static inline struct page *sg_page(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
BUG_ON(sg_is_chain(sg));
#endif
return (struct page *)((sg)->page_link & ~0x3);
}
/*
* Loop over each sg element, following the pointer to a new list if necessary
*/
#define for_each_sg(sglist, sg, nr, __i) \
for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
/**
* sg_chain - Chain two sglists together
* @prv: First scatterlist
* @prv_nents: Number of entries in prv
* @sgl: Second scatterlist
*
* Description:
* Links @prv@ and @sgl@ together, to form a longer scatterlist.
*
**/
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
/*
* offset and length are unused for chain entry. Clear them.
*/
prv[prv_nents - 1].offset = 0;
prv[prv_nents - 1].length = 0;
/*
* Set lowest bit to indicate a link pointer, and make sure to clear
* the termination bit if it happens to be set.
*/
prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
}
/**
* sg_mark_end - Mark the end of the scatterlist
* @sg: SG entryScatterlist
*
* Description:
* Marks the passed in sg entry as the termination point for the sg
* table. A call to sg_next() on this entry will return NULL.
*
**/
static inline void sg_mark_end(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
#endif
/*
* Set termination bit, clear potential chain bit
*/
sg->page_link |= 0x02;
sg->page_link &= ~0x01;
}
/**
* sg_unmark_end - Undo setting the end of the scatterlist
* @sg: SG entryScatterlist
*
* Description:
* Removes the termination marker from the given entry of the scatterlist.
*
**/
static inline void sg_unmark_end(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
#endif
sg->page_link &= ~0x02;
}
static inline struct scatterlist *sg_next(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
#endif
if (sg_is_last(sg))
return NULL;
sg++;
if (unlikely(sg_is_chain(sg)))
sg = sg_chain_ptr(sg);
return sg;
}
static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
{
memset(sgl, 0, sizeof(*sgl) * nents);
#ifdef CONFIG_DEBUG_SG
{
unsigned int i;
for (i = 0; i < nents; i++)
sgl[i].sg_magic = SG_MAGIC;
}
#endif
sg_mark_end(&sgl[nents - 1]);
}
static inline dma_addr_t sg_phys(struct scatterlist *sg)
{
return page_to_phys(sg_page(sg)) + sg->offset;
}
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}
static inline void sg_init_one(struct scatterlist *sg,
const void *buf, unsigned int buflen)
{
sg_init_table(sg, 1);
sg_set_buf(sg, buf, buflen);
}
#endif /* SCATTERLIST_H */

View file

@ -0,0 +1,2 @@
#ifndef LINUX_SLAB_H
#endif

View file

@ -0,0 +1,50 @@
#ifndef UACCESS_H
#define UACCESS_H
extern void *__user_addr_min, *__user_addr_max;
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
static inline void __chk_user_ptr(const volatile void *p, size_t size)
{
assert(p >= __user_addr_min && p + size <= __user_addr_max);
}
#define put_user(x, ptr) \
({ \
typeof(ptr) __pu_ptr = (ptr); \
__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
ACCESS_ONCE(*(__pu_ptr)) = x; \
0; \
})
#define get_user(x, ptr) \
({ \
typeof(ptr) __pu_ptr = (ptr); \
__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
x = ACCESS_ONCE(*(__pu_ptr)); \
0; \
})
static void volatile_memcpy(volatile char *to, const volatile char *from,
unsigned long n)
{
while (n--)
*(to++) = *(from++);
}
static inline int copy_from_user(void *to, const void __user volatile *from,
unsigned long n)
{
__chk_user_ptr(from, n);
volatile_memcpy(to, from, n);
return 0;
}
static inline int copy_to_user(void __user volatile *to, const void *from,
unsigned long n)
{
__chk_user_ptr(to, n);
volatile_memcpy(to, from, n);
return 0;
}
#endif /* UACCESS_H */

3
tools/virtio/linux/uio.h Normal file
View file

@ -0,0 +1,3 @@
#include <linux/kernel.h>
#include "../../../include/linux/uio.h"

View file

@ -0,0 +1,87 @@
#ifndef LINUX_VIRTIO_H
#define LINUX_VIRTIO_H
#include <linux/scatterlist.h>
#include <linux/kernel.h>
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
#define list_add_tail(a, b) do {} while (0)
#define list_del(a) do {} while (0)
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BITS_PER_BYTE 8
#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
/* TODO: Not atomic as it should be:
* we don't use this for anything important. */
static inline void clear_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
*p &= ~mask;
}
static inline int test_bit(int nr, const volatile unsigned long *addr)
{
return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
}
/* end of stubs */
struct virtio_device {
void *dev;
unsigned long features[1];
};
struct virtqueue {
/* TODO: commented as list macros are empty stubs for now.
* Broken but enough for virtio_ring.c
* struct list_head list; */
void (*callback)(struct virtqueue *vq);
const char *name;
struct virtio_device *vdev;
unsigned int index;
unsigned int num_free;
void *priv;
};
/* Interfaces exported by virtio_ring. */
int virtqueue_add_sgs(struct virtqueue *vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp);
int virtqueue_add_outbuf(struct virtqueue *vq,
struct scatterlist sg[], unsigned int num,
void *data,
gfp_t gfp);
int virtqueue_add_inbuf(struct virtqueue *vq,
struct scatterlist sg[], unsigned int num,
void *data,
gfp_t gfp);
bool virtqueue_kick(struct virtqueue *vq);
void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
void virtqueue_disable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
void *virtqueue_detach_unused_buf(struct virtqueue *vq);
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
void *pages,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
const char *name);
void vring_del_virtqueue(struct virtqueue *vq);
#endif

View file

@ -0,0 +1,6 @@
#define VIRTIO_TRANSPORT_F_START 28
#define VIRTIO_TRANSPORT_F_END 32
#define virtio_has_feature(dev, feature) \
test_bit((feature), (dev)->features)

View file

@ -0,0 +1 @@
#include "../../../include/linux/virtio_ring.h"

View file

@ -0,0 +1 @@
#include "../../../include/linux/vringh.h"

View file

@ -0,0 +1 @@
#include <sys/uio.h>

View file

@ -0,0 +1 @@
#include "../../../../include/uapi/linux/virtio_config.h"

View file

@ -0,0 +1,4 @@
#ifndef VIRTIO_RING_H
#define VIRTIO_RING_H
#include "../../../../include/uapi/linux/virtio_ring.h"
#endif /* VIRTIO_RING_H */

View file

@ -0,0 +1,2 @@
obj-m += vhost_test.o
EXTRA_CFLAGS += -Idrivers/vhost

View file

@ -0,0 +1 @@
#include "test.c"

View file

@ -0,0 +1,13 @@
CC = gcc
CFLAGS = -O2 -Wall -pthread
all: trace-agent
.c.o:
$(CC) $(CFLAGS) -c $^ -o $@
trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
$(CC) $(CFLAGS) -o $@ $^
clean:
rm -f *.o trace-agent

View file

@ -0,0 +1,118 @@
Trace Agent for virtio-trace
============================
Trace agent is a user tool for sending trace data of a guest to a Host in low
overhead. Trace agent has the following functions:
- splice a page of ring-buffer to read_pipe without memory copying
- splice the page from write_pipe to virtio-console without memory copying
- write trace data to stdout by using -o option
- controlled by start/stop orders from a Host
The trace agent operates as follows:
1) Initialize all structures.
2) Create a read/write thread per CPU. Each thread is bound to a CPU.
The read/write threads hold it.
3) A controller thread does poll() for a start order of a host.
4) After the controller of the trace agent receives a start order from a host,
the controller wake read/write threads.
5) The read/write threads start to read trace data from ring-buffers and
write the data to virtio-serial.
6) If the controller receives a stop order from a host, the read/write threads
stop to read trace data.
Files
=====
README: this file
Makefile: Makefile of trace agent for virtio-trace
trace-agent.c: includes main function, sets up for operating trace agent
trace-agent.h: includes all structures and some macros
trace-agent-ctl.c: includes controller function for read/write threads
trace-agent-rw.c: includes read/write threads function
Setup
=====
To use this trace agent for virtio-trace, we need to prepare some virtio-serial
I/Fs.
1) Make FIFO in a host
virtio-trace uses virtio-serial pipe as trace data paths as to the number
of CPUs and a control path, so FIFO (named pipe) should be created as follows:
# mkdir /tmp/virtio-trace/
# mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
# mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
For example, if a guest use three CPUs, the names are
trace-path-cpu{0,1,2}.{in.out}
and
agent-ctl-path.{in,out}.
2) Set up of virtio-serial pipe in a host
Add qemu option to use virtio-serial pipe.
##virtio-serial device##
-device virtio-serial-pci,id=virtio-serial0\
##control path##
-chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
-device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
id=channel0,name=agent-ctl-path\
##data path##
-chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
-device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
id=channel1,name=trace-path-cpu0\
...
If you manage guests with libvirt, add the following tags to domain XML files.
Then, libvirt passes the same command option to qemu.
<channel type='pipe'>
<source path='/tmp/virtio-trace/agent-ctl-path'/>
<target type='virtio' name='agent-ctl-path'/>
<address type='virtio-serial' controller='0' bus='0' port='0'/>
</channel>
<channel type='pipe'>
<source path='/tmp/virtio-trace/trace-path-cpu0'/>
<target type='virtio' name='trace-path-cpu0'/>
<address type='virtio-serial' controller='0' bus='0' port='1'/>
</channel>
...
Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
3) Boot the guest
You can find some chardev in /dev/virtio-ports/ in the guest.
Run
===
0) Build trace agent in a guest
$ make
1) Enable ftrace in the guest
<Example>
# echo 1 > /sys/kernel/debug/tracing/events/sched/enable
2) Run trace agent in the guest
This agent must be operated as root.
# ./trace-agent
read/write threads in the agent wait for start order from host. If you add -o
option, trace data are output via stdout in the guest.
3) Open FIFO in a host
# cat /tmp/virtio-trace/trace-path-cpu0.out
If a host does not open these, trace data get stuck in buffers of virtio. Then,
the guest will stop by specification of chardev in QEMU. This blocking mode may
be solved in the future.
4) Start to read trace data by ordering from a host
A host injects read start order to the guest via virtio-serial.
# echo 1 > /tmp/virtio-trace/agent-ctl-path.in
5) Stop to read trace data by ordering from a host
A host injects read stop order to the guest via virtio-serial.
# echo 0 > /tmp/virtio-trace/agent-ctl-path.in

View file

@ -0,0 +1,137 @@
/*
* Controller of read/write threads for virtio-trace
*
* Copyright (C) 2012 Hitachi, Ltd.
* Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
*
* Licensed under GPL version 2 only.
*
*/
#define _GNU_SOURCE
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "trace-agent.h"
#define HOST_MSG_SIZE 256
#define EVENT_WAIT_MSEC 100
static volatile sig_atomic_t global_signal_val;
bool global_sig_receive; /* default false */
bool global_run_operation; /* default false*/
/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
static void signal_handler(int sig)
{
global_signal_val = sig;
}
int rw_ctl_init(const char *ctl_path)
{
int ctl_fd;
ctl_fd = open(ctl_path, O_RDONLY);
if (ctl_fd == -1) {
pr_err("Cannot open ctl_fd\n");
goto error;
}
return ctl_fd;
error:
exit(EXIT_FAILURE);
}
static int wait_order(int ctl_fd)
{
struct pollfd poll_fd;
int ret = 0;
while (!global_sig_receive) {
poll_fd.fd = ctl_fd;
poll_fd.events = POLLIN;
ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
if (global_signal_val) {
global_sig_receive = true;
pr_info("Receive interrupt %d\n", global_signal_val);
/* Wakes rw-threads when they are sleeping */
if (!global_run_operation)
pthread_cond_broadcast(&cond_wakeup);
ret = -1;
break;
}
if (ret < 0) {
pr_err("Polling error\n");
goto error;
}
if (ret)
break;
};
return ret;
error:
exit(EXIT_FAILURE);
}
/*
* contol read/write threads by handling global_run_operation
*/
void *rw_ctl_loop(int ctl_fd)
{
ssize_t rlen;
char buf[HOST_MSG_SIZE];
int ret;
/* Setup signal handlers */
signal(SIGTERM, signal_handler);
signal(SIGINT, signal_handler);
signal(SIGQUIT, signal_handler);
while (!global_sig_receive) {
ret = wait_order(ctl_fd);
if (ret < 0)
break;
rlen = read(ctl_fd, buf, sizeof(buf));
if (rlen < 0) {
pr_err("read data error in ctl thread\n");
goto error;
}
if (rlen == 2 && buf[0] == '1') {
/*
* If host writes '1' to a control path,
* this controller wakes all read/write threads.
*/
global_run_operation = true;
pthread_cond_broadcast(&cond_wakeup);
pr_debug("Wake up all read/write threads\n");
} else if (rlen == 2 && buf[0] == '0') {
/*
* If host writes '0' to a control path, read/write
* threads will wait for notification from Host.
*/
global_run_operation = false;
pr_debug("Stop all read/write threads\n");
} else
pr_info("Invalid host notification: %s\n", buf);
}
return NULL;
error:
exit(EXIT_FAILURE);
}

View file

@ -0,0 +1,192 @@
/*
* Read/write thread of a guest agent for virtio-trace
*
* Copyright (C) 2012 Hitachi, Ltd.
* Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
*
* Licensed under GPL version 2 only.
*
*/
#define _GNU_SOURCE
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#include "trace-agent.h"
#define READ_WAIT_USEC 100000
void *rw_thread_info_new(void)
{
struct rw_thread_info *rw_ti;
rw_ti = zalloc(sizeof(struct rw_thread_info));
if (rw_ti == NULL) {
pr_err("rw_thread_info zalloc error\n");
exit(EXIT_FAILURE);
}
rw_ti->cpu_num = -1;
rw_ti->in_fd = -1;
rw_ti->out_fd = -1;
rw_ti->read_pipe = -1;
rw_ti->write_pipe = -1;
rw_ti->pipe_size = PIPE_INIT;
return rw_ti;
}
void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
bool stdout_flag, unsigned long pipe_size,
struct rw_thread_info *rw_ti)
{
int data_pipe[2];
rw_ti->cpu_num = cpu;
/* set read(input) fd */
rw_ti->in_fd = open(in_path, O_RDONLY);
if (rw_ti->in_fd == -1) {
pr_err("Could not open in_fd (CPU:%d)\n", cpu);
goto error;
}
/* set write(output) fd */
if (!stdout_flag) {
/* virtio-serial output mode */
rw_ti->out_fd = open(out_path, O_WRONLY);
if (rw_ti->out_fd == -1) {
pr_err("Could not open out_fd (CPU:%d)\n", cpu);
goto error;
}
} else
/* stdout mode */
rw_ti->out_fd = STDOUT_FILENO;
if (pipe2(data_pipe, O_NONBLOCK) < 0) {
pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
goto error;
}
/*
* Size of pipe is 64kB in default based on fs/pipe.c.
* To read/write trace data speedy, pipe size is changed.
*/
if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
goto error;
}
rw_ti->read_pipe = data_pipe[1];
rw_ti->write_pipe = data_pipe[0];
rw_ti->pipe_size = pipe_size;
return NULL;
error:
exit(EXIT_FAILURE);
}
/* Bind a thread to a cpu */
static void bind_cpu(int cpu_num)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu_num, &mask);
/* bind my thread to cpu_num by assigning zero to the first argument */
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
}
static void *rw_thread_main(void *thread_info)
{
ssize_t rlen, wlen;
ssize_t ret;
struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
bind_cpu(ts->cpu_num);
while (1) {
/* Wait for a read order of trace data by Host OS */
if (!global_run_operation) {
pthread_mutex_lock(&mutex_notify);
pthread_cond_wait(&cond_wakeup, &mutex_notify);
pthread_mutex_unlock(&mutex_notify);
}
if (global_sig_receive)
break;
/*
* Each thread read trace_pipe_raw of each cpu bounding the
* thread, so contention of multi-threads does not occur.
*/
rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
if (rlen < 0) {
pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
goto error;
} else if (rlen == 0) {
/*
* If trace data do not exist or are unreadable not
* for exceeding the page size, splice_read returns
* NULL. Then, this waits for being filled the data in a
* ring-buffer.
*/
usleep(READ_WAIT_USEC);
pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
continue;
}
wlen = 0;
do {
ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
rlen - wlen,
SPLICE_F_MOVE | SPLICE_F_MORE);
if (ret < 0) {
pr_err("Splice_write in rw-thread(%d)\n",
ts->cpu_num);
goto error;
} else if (ret == 0)
/*
* When host reader is not in time for reading
* trace data, guest will be stopped. This is
* because char dev in QEMU is not supported
* non-blocking mode. Then, writer might be
* sleep in that case.
* This sleep will be removed by supporting
* non-blocking mode.
*/
sleep(1);
wlen += ret;
} while (wlen < rlen);
}
return NULL;
error:
exit(EXIT_FAILURE);
}
pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
{
int ret;
pthread_t rw_thread_per_cpu;
ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
if (ret != 0) {
pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
exit(EXIT_FAILURE);
}
return rw_thread_per_cpu;
}

View file

@ -0,0 +1,270 @@
/*
* Guest agent for virtio-trace
*
* Copyright (C) 2012 Hitachi, Ltd.
* Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
*
* Licensed under GPL version 2 only.
*
*/
#define _GNU_SOURCE
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "trace-agent.h"
#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
#define PIPE_DEF_BUFS 16
#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
#define PIPE_MAX_SIZE (1024*1024)
#define READ_PATH_FMT \
"/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
static int get_total_cpus(void)
{
int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
if (nr_cpus <= 0) {
pr_err("Could not read cpus\n");
goto error;
} else if (nr_cpus > MAX_CPUS) {
pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
goto error;
}
return nr_cpus;
error:
exit(EXIT_FAILURE);
}
static void *agent_info_new(void)
{
struct agent_info *s;
int i;
s = zalloc(sizeof(struct agent_info));
if (s == NULL) {
pr_err("agent_info zalloc error\n");
exit(EXIT_FAILURE);
}
s->pipe_size = PIPE_INIT;
s->use_stdout = false;
s->cpus = get_total_cpus();
s->ctl_fd = -1;
/* read/write threads init */
for (i = 0; i < s->cpus; i++)
s->rw_ti[i] = rw_thread_info_new();
return s;
}
static unsigned long parse_size(const char *arg)
{
unsigned long value, round;
char *ptr;
value = strtoul(arg, &ptr, 10);
switch (*ptr) {
case 'K': case 'k':
value <<= 10;
break;
case 'M': case 'm':
value <<= 20;
break;
default:
break;
}
if (value > PIPE_MAX_SIZE) {
pr_err("Pipe size must be less than 1MB\n");
goto error;
} else if (value < PIPE_MIN_SIZE) {
pr_err("Pipe size must be over 64KB\n");
goto error;
}
/* Align buffer size with page unit */
round = value & (PAGE_SIZE - 1);
value = value - round;
return value;
error:
return 0;
}
static void usage(char const *prg)
{
pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
}
static const char *make_path(int cpu_num, bool this_is_write_path)
{
int ret;
char *buf;
buf = zalloc(PATH_MAX);
if (buf == NULL) {
pr_err("Could not allocate buffer\n");
goto error;
}
if (this_is_write_path)
/* write(output) path */
ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
else
/* read(input) path */
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
if (ret <= 0) {
pr_err("Failed to generate %s path(CPU#%d):%d\n",
this_is_write_path ? "read" : "write", cpu_num, ret);
goto error;
}
return buf;
error:
free(buf);
return NULL;
}
static const char *make_input_path(int cpu_num)
{
return make_path(cpu_num, false);
}
static const char *make_output_path(int cpu_num)
{
return make_path(cpu_num, true);
}
static void *agent_info_init(struct agent_info *s)
{
int cpu;
const char *in_path = NULL;
const char *out_path = NULL;
/* init read/write threads */
for (cpu = 0; cpu < s->cpus; cpu++) {
/* set read(input) path per read/write thread */
in_path = make_input_path(cpu);
if (in_path == NULL)
goto error;
/* set write(output) path per read/write thread*/
if (!s->use_stdout) {
out_path = make_output_path(cpu);
if (out_path == NULL)
goto error;
} else
/* stdout mode */
pr_debug("stdout mode\n");
rw_thread_init(cpu, in_path, out_path, s->use_stdout,
s->pipe_size, s->rw_ti[cpu]);
}
/* init controller of read/write threads */
s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
return NULL;
error:
exit(EXIT_FAILURE);
}
static void *parse_args(int argc, char *argv[], struct agent_info *s)
{
int cmd;
unsigned long size;
while ((cmd = getopt(argc, argv, "hos:")) != -1) {
switch (cmd) {
/* stdout mode */
case 'o':
s->use_stdout = true;
break;
/* size of pipe */
case 's':
size = parse_size(optarg);
if (size == 0)
goto error;
s->pipe_size = size;
break;
case 'h':
default:
usage(argv[0]);
goto error;
}
}
agent_info_init(s);
return NULL;
error:
exit(EXIT_FAILURE);
}
static void agent_main_loop(struct agent_info *s)
{
int cpu;
pthread_t rw_thread_per_cpu[MAX_CPUS];
/* Start all read/write threads */
for (cpu = 0; cpu < s->cpus; cpu++)
rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
rw_ctl_loop(s->ctl_fd);
/* Finish all read/write threads */
for (cpu = 0; cpu < s->cpus; cpu++) {
int ret;
ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
if (ret != 0) {
pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
exit(EXIT_FAILURE);
}
}
}
static void agent_info_free(struct agent_info *s)
{
int i;
close(s->ctl_fd);
for (i = 0; i < s->cpus; i++) {
close(s->rw_ti[i]->in_fd);
close(s->rw_ti[i]->out_fd);
close(s->rw_ti[i]->read_pipe);
close(s->rw_ti[i]->write_pipe);
free(s->rw_ti[i]);
}
free(s);
}
int main(int argc, char *argv[])
{
struct agent_info *s = NULL;
s = agent_info_new();
parse_args(argc, argv, s);
agent_main_loop(s);
agent_info_free(s);
return 0;
}

View file

@ -0,0 +1,75 @@
#ifndef __TRACE_AGENT_H__
#define __TRACE_AGENT_H__
#include <pthread.h>
#include <stdbool.h>
#define MAX_CPUS 256
#define PIPE_INIT (1024*1024)
/*
* agent_info - structure managing total information of guest agent
* @pipe_size: size of pipe (default 1MB)
* @use_stdout: set to true when o option is added (default false)
* @cpus: total number of CPUs
* @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path
* @rw_ti: structure managing information of read/write threads
*/
struct agent_info {
unsigned long pipe_size;
bool use_stdout;
int cpus;
int ctl_fd;
struct rw_thread_info *rw_ti[MAX_CPUS];
};
/*
* rw_thread_info - structure managing a read/write thread a cpu
* @cpu_num: cpu number operating this read/write thread
* @in_fd: fd of reading trace data path in cpu_num
* @out_fd: fd of writing trace data path in cpu_num
* @read_pipe: fd of read pipe
* @write_pipe: fd of write pipe
* @pipe_size: size of pipe (default 1MB)
*/
struct rw_thread_info {
int cpu_num;
int in_fd;
int out_fd;
int read_pipe;
int write_pipe;
unsigned long pipe_size;
};
/* use for stopping rw threads */
extern bool global_sig_receive;
/* use for notification */
extern bool global_run_operation;
extern pthread_mutex_t mutex_notify;
extern pthread_cond_t cond_wakeup;
/* for controller of read/write threads */
extern int rw_ctl_init(const char *ctl_path);
extern void *rw_ctl_loop(int ctl_fd);
/* for trace read/write thread */
extern void *rw_thread_info_new(void);
extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
bool stdout_flag, unsigned long pipe_size,
struct rw_thread_info *rw_ti);
extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
static inline void *zalloc(size_t size)
{
return calloc(1, size);
}
#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
#ifdef DEBUG
#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
#else
#define pr_debug(format, ...) do {} while (0)
#endif
#endif /*__TRACE_AGENT_H__*/

290
tools/virtio/virtio_test.c Normal file
View file

@ -0,0 +1,290 @@
#define _GNU_SOURCE
#include <getopt.h>
#include <string.h>
#include <poll.h>
#include <sys/eventfd.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <stdbool.h>
#include <linux/vhost.h>
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
#include "../../drivers/vhost/test.h"
/* Unused */
void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
struct vq_info {
int kick;
int call;
int num;
int idx;
void *ring;
/* copy used for control */
struct vring vring;
struct virtqueue *vq;
};
struct vdev_info {
struct virtio_device vdev;
int control;
struct pollfd fds[1];
struct vq_info vqs[1];
int nvqs;
void *buf;
size_t buf_size;
struct vhost_memory *mem;
};
bool vq_notify(struct virtqueue *vq)
{
struct vq_info *info = vq->priv;
unsigned long long v = 1;
int r;
r = write(info->kick, &v, sizeof v);
assert(r == sizeof v);
return true;
}
void vq_callback(struct virtqueue *vq)
{
}
void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
{
struct vhost_vring_state state = { .index = info->idx };
struct vhost_vring_file file = { .index = info->idx };
unsigned long long features = dev->vdev.features[0];
struct vhost_vring_addr addr = {
.index = info->idx,
.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
.avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
.used_user_addr = (uint64_t)(unsigned long)info->vring.used,
};
int r;
r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
assert(r >= 0);
state.num = info->vring.num;
r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
assert(r >= 0);
state.num = 0;
r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
assert(r >= 0);
r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
assert(r >= 0);
file.fd = info->kick;
r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
assert(r >= 0);
file.fd = info->call;
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
assert(r >= 0);
}
static void vq_info_add(struct vdev_info *dev, int num)
{
struct vq_info *info = &dev->vqs[dev->nvqs];
int r;
info->idx = dev->nvqs;
info->kick = eventfd(0, EFD_NONBLOCK);
info->call = eventfd(0, EFD_NONBLOCK);
r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
assert(r >= 0);
memset(info->ring, 0, vring_size(num, 4096));
vring_init(&info->vring, num, info->ring, 4096);
info->vq = vring_new_virtqueue(info->idx,
info->vring.num, 4096, &dev->vdev,
true, info->ring,
vq_notify, vq_callback, "test");
assert(info->vq);
info->vq->priv = info;
vhost_vq_setup(dev, info);
dev->fds[info->idx].fd = info->call;
dev->fds[info->idx].events = POLLIN;
dev->nvqs++;
}
static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
{
int r;
memset(dev, 0, sizeof *dev);
dev->vdev.features[0] = features;
dev->vdev.features[1] = features >> 32;
dev->buf_size = 1024;
dev->buf = malloc(dev->buf_size);
assert(dev->buf);
dev->control = open("/dev/vhost-test", O_RDWR);
assert(dev->control >= 0);
r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
assert(r >= 0);
dev->mem = malloc(offsetof(struct vhost_memory, regions) +
sizeof dev->mem->regions[0]);
assert(dev->mem);
memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
sizeof dev->mem->regions[0]);
dev->mem->nregions = 1;
dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
dev->mem->regions[0].userspace_addr = (long)dev->buf;
dev->mem->regions[0].memory_size = dev->buf_size;
r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
assert(r >= 0);
}
/* TODO: this is pretty bad: we get a cache line bounce
* for the wait queue on poll and another one on read,
* plus the read which is there just to clear the
* current state. */
static void wait_for_interrupt(struct vdev_info *dev)
{
int i;
unsigned long long val;
poll(dev->fds, dev->nvqs, -1);
for (i = 0; i < dev->nvqs; ++i)
if (dev->fds[i].revents & POLLIN) {
read(dev->fds[i].fd, &val, sizeof val);
}
}
static void run_test(struct vdev_info *dev, struct vq_info *vq,
bool delayed, int bufs)
{
struct scatterlist sl;
long started = 0, completed = 0;
long completed_before;
int r, test = 1;
unsigned len;
long long spurious = 0;
r = ioctl(dev->control, VHOST_TEST_RUN, &test);
assert(r >= 0);
for (;;) {
virtqueue_disable_cb(vq->vq);
completed_before = completed;
do {
if (started < bufs) {
sg_init_one(&sl, dev->buf, dev->buf_size);
r = virtqueue_add_outbuf(vq->vq, &sl, 1,
dev->buf + started,
GFP_ATOMIC);
if (likely(r == 0)) {
++started;
if (unlikely(!virtqueue_kick(vq->vq)))
r = -1;
}
} else
r = -1;
/* Flush out completed bufs if any */
if (virtqueue_get_buf(vq->vq, &len)) {
++completed;
r = 0;
}
} while (r == 0);
if (completed == completed_before)
++spurious;
assert(completed <= bufs);
assert(started <= bufs);
if (completed == bufs)
break;
if (delayed) {
if (virtqueue_enable_cb_delayed(vq->vq))
wait_for_interrupt(dev);
} else {
if (virtqueue_enable_cb(vq->vq))
wait_for_interrupt(dev);
}
}
test = 0;
r = ioctl(dev->control, VHOST_TEST_RUN, &test);
assert(r >= 0);
fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
}
const char optstring[] = "h";
const struct option longopts[] = {
{
.name = "help",
.val = 'h',
},
{
.name = "event-idx",
.val = 'E',
},
{
.name = "no-event-idx",
.val = 'e',
},
{
.name = "indirect",
.val = 'I',
},
{
.name = "no-indirect",
.val = 'i',
},
{
.name = "delayed-interrupt",
.val = 'D',
},
{
.name = "no-delayed-interrupt",
.val = 'd',
},
{
}
};
static void help(void)
{
fprintf(stderr, "Usage: virtio_test [--help]"
" [--no-indirect]"
" [--no-event-idx]"
" [--delayed-interrupt]"
"\n");
}
int main(int argc, char **argv)
{
struct vdev_info dev;
unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
(1ULL << VIRTIO_RING_F_EVENT_IDX);
int o;
bool delayed = false;
for (;;) {
o = getopt_long(argc, argv, optstring, longopts, NULL);
switch (o) {
case -1:
goto done;
case '?':
help();
exit(2);
case 'e':
features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
break;
case 'h':
help();
goto done;
case 'i':
features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
break;
case 'D':
delayed = true;
break;
default:
assert(0);
break;
}
}
done:
vdev_info_init(&dev, features);
vq_info_add(&dev, 256);
run_test(&dev, &dev.vqs[0], delayed, 0x100000);
return 0;
}

746
tools/virtio/vringh_test.c Normal file
View file

@ -0,0 +1,746 @@
/* Simple test of virtio code, entirely in userpsace. */
#define _GNU_SOURCE
#include <sched.h>
#include <err.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/virtio.h>
#include <linux/vringh.h>
#include <linux/virtio_ring.h>
#include <linux/uaccess.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#define USER_MEM (1024*1024)
void *__user_addr_min, *__user_addr_max;
void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
static u64 user_addr_offset;
#define RINGSIZE 256
#define ALIGN 4096
static bool never_notify_host(struct virtqueue *vq)
{
abort();
}
static void never_callback_guest(struct virtqueue *vq)
{
abort();
}
static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
{
if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
return false;
if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
return false;
r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
r->offset = user_addr_offset;
return true;
}
/* We return single byte ranges. */
static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
{
if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
return false;
if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
return false;
r->start = addr;
r->end_incl = r->start;
r->offset = user_addr_offset;
return true;
}
struct guest_virtio_device {
struct virtio_device vdev;
int to_host_fd;
unsigned long notifies;
};
static bool parallel_notify_host(struct virtqueue *vq)
{
int rc;
struct guest_virtio_device *gvdev;
gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
rc = write(gvdev->to_host_fd, "", 1);
if (rc < 0)
return false;
gvdev->notifies++;
return true;
}
static bool no_notify_host(struct virtqueue *vq)
{
return true;
}
#define NUM_XFERS (10000000)
/* We aim for two "distant" cpus. */
static void find_cpus(unsigned int *first, unsigned int *last)
{
unsigned int i;
*first = -1U;
*last = 0;
for (i = 0; i < 4096; i++) {
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(i, &set);
if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
if (i < *first)
*first = i;
if (i > *last)
*last = i;
}
}
}
/* Opencoded version for fast mode */
static inline int vringh_get_head(struct vringh *vrh, u16 *head)
{
u16 avail_idx, i;
int err;
err = get_user(avail_idx, &vrh->vring.avail->idx);
if (err)
return err;
if (vrh->last_avail_idx == avail_idx)
return 0;
/* Only get avail ring entries after they have been exposed by guest. */
virtio_rmb(vrh->weak_barriers);
i = vrh->last_avail_idx & (vrh->vring.num - 1);
err = get_user(*head, &vrh->vring.avail->ring[i]);
if (err)
return err;
vrh->last_avail_idx++;
return 1;
}
static int parallel_test(unsigned long features,
bool (*getrange)(struct vringh *vrh,
u64 addr, struct vringh_range *r),
bool fast_vringh)
{
void *host_map, *guest_map;
int fd, mapsize, to_guest[2], to_host[2];
unsigned long xfers = 0, notifies = 0, receives = 0;
unsigned int first_cpu, last_cpu;
cpu_set_t cpu_set;
char buf[128];
/* Create real file to mmap. */
fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
if (fd < 0)
err(1, "Opening /tmp/vringh_test-file");
/* Extra room at the end for some data, and indirects */
mapsize = vring_size(RINGSIZE, ALIGN)
+ RINGSIZE * 2 * sizeof(int)
+ RINGSIZE * 6 * sizeof(struct vring_desc);
mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
ftruncate(fd, mapsize);
/* Parent and child use separate addresses, to check our mapping logic! */
host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
pipe(to_guest);
pipe(to_host);
CPU_ZERO(&cpu_set);
find_cpus(&first_cpu, &last_cpu);
printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
fflush(stdout);
if (fork() != 0) {
struct vringh vrh;
int status, err, rlen = 0;
char rbuf[5];
/* We are the host: never access guest addresses! */
munmap(guest_map, mapsize);
__user_addr_min = host_map;
__user_addr_max = __user_addr_min + mapsize;
user_addr_offset = host_map - guest_map;
assert(user_addr_offset);
close(to_guest[0]);
close(to_host[1]);
vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
vringh_init_user(&vrh, features, RINGSIZE, true,
vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
CPU_SET(first_cpu, &cpu_set);
if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
errx(1, "Could not set affinity to cpu %u", first_cpu);
while (xfers < NUM_XFERS) {
struct iovec host_riov[2], host_wiov[2];
struct vringh_iov riov, wiov;
u16 head, written;
if (fast_vringh) {
for (;;) {
err = vringh_get_head(&vrh, &head);
if (err != 0)
break;
err = vringh_need_notify_user(&vrh);
if (err < 0)
errx(1, "vringh_need_notify_user: %i",
err);
if (err) {
write(to_guest[1], "", 1);
notifies++;
}
}
if (err != 1)
errx(1, "vringh_get_head");
written = 0;
goto complete;
} else {
vringh_iov_init(&riov,
host_riov,
ARRAY_SIZE(host_riov));
vringh_iov_init(&wiov,
host_wiov,
ARRAY_SIZE(host_wiov));
err = vringh_getdesc_user(&vrh, &riov, &wiov,
getrange, &head);
}
if (err == 0) {
err = vringh_need_notify_user(&vrh);
if (err < 0)
errx(1, "vringh_need_notify_user: %i",
err);
if (err) {
write(to_guest[1], "", 1);
notifies++;
}
if (!vringh_notify_enable_user(&vrh))
continue;
/* Swallow all notifies at once. */
if (read(to_host[0], buf, sizeof(buf)) < 1)
break;
vringh_notify_disable_user(&vrh);
receives++;
continue;
}
if (err != 1)
errx(1, "vringh_getdesc_user: %i", err);
/* We simply copy bytes. */
if (riov.used) {
rlen = vringh_iov_pull_user(&riov, rbuf,
sizeof(rbuf));
if (rlen != 4)
errx(1, "vringh_iov_pull_user: %i",
rlen);
assert(riov.i == riov.used);
written = 0;
} else {
err = vringh_iov_push_user(&wiov, rbuf, rlen);
if (err != rlen)
errx(1, "vringh_iov_push_user: %i",
err);
assert(wiov.i == wiov.used);
written = err;
}
complete:
xfers++;
err = vringh_complete_user(&vrh, head, written);
if (err != 0)
errx(1, "vringh_complete_user: %i", err);
}
err = vringh_need_notify_user(&vrh);
if (err < 0)
errx(1, "vringh_need_notify_user: %i", err);
if (err) {
write(to_guest[1], "", 1);
notifies++;
}
wait(&status);
if (!WIFEXITED(status))
errx(1, "Child died with signal %i?", WTERMSIG(status));
if (WEXITSTATUS(status) != 0)
errx(1, "Child exited %i?", WEXITSTATUS(status));
printf("Host: notified %lu, pinged %lu\n", notifies, receives);
return 0;
} else {
struct guest_virtio_device gvdev;
struct virtqueue *vq;
unsigned int *data;
struct vring_desc *indirects;
unsigned int finished = 0;
/* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
data = guest_map + vring_size(RINGSIZE, ALIGN);
indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
/* We are the guest. */
munmap(host_map, mapsize);
close(to_guest[1]);
close(to_host[0]);
gvdev.vdev.features[0] = features;
gvdev.to_host_fd = to_host[1];
gvdev.notifies = 0;
CPU_SET(first_cpu, &cpu_set);
if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
err(1, "Could not set affinity to cpu %u", first_cpu);
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
guest_map, fast_vringh ? no_notify_host
: parallel_notify_host,
never_callback_guest, "guest vq");
/* Don't kfree indirects. */
__kfree_ignore_start = indirects;
__kfree_ignore_end = indirects + RINGSIZE * 6;
while (xfers < NUM_XFERS) {
struct scatterlist sg[4];
unsigned int num_sg, len;
int *dbuf, err;
bool output = !(xfers % 2);
/* Consume bufs. */
while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
if (len == 4)
assert(*dbuf == finished - 1);
else if (!fast_vringh)
assert(*dbuf == finished);
finished++;
}
/* Produce a buffer. */
dbuf = data + (xfers % (RINGSIZE + 1));
if (output)
*dbuf = xfers;
else
*dbuf = -1;
switch ((xfers / sizeof(*dbuf)) % 4) {
case 0:
/* Nasty three-element sg list. */
sg_init_table(sg, num_sg = 3);
sg_set_buf(&sg[0], (void *)dbuf, 1);
sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
break;
case 1:
sg_init_table(sg, num_sg = 2);
sg_set_buf(&sg[0], (void *)dbuf, 1);
sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
break;
case 2:
sg_init_table(sg, num_sg = 1);
sg_set_buf(&sg[0], (void *)dbuf, 4);
break;
case 3:
sg_init_table(sg, num_sg = 4);
sg_set_buf(&sg[0], (void *)dbuf, 1);
sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
break;
}
/* May allocate an indirect, so force it to allocate
* user addr */
__kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
if (output)
err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
GFP_KERNEL);
else
err = virtqueue_add_inbuf(vq, sg, num_sg,
dbuf, GFP_KERNEL);
if (err == -ENOSPC) {
if (!virtqueue_enable_cb_delayed(vq))
continue;
/* Swallow all notifies at once. */
if (read(to_guest[0], buf, sizeof(buf)) < 1)
break;
receives++;
virtqueue_disable_cb(vq);
continue;
}
if (err)
errx(1, "virtqueue_add_in/outbuf: %i", err);
xfers++;
virtqueue_kick(vq);
}
/* Any extra? */
while (finished != xfers) {
int *dbuf;
unsigned int len;
/* Consume bufs. */
dbuf = virtqueue_get_buf(vq, &len);
if (dbuf) {
if (len == 4)
assert(*dbuf == finished - 1);
else
assert(len == 0);
finished++;
continue;
}
if (!virtqueue_enable_cb_delayed(vq))
continue;
if (read(to_guest[0], buf, sizeof(buf)) < 1)
break;
receives++;
virtqueue_disable_cb(vq);
}
printf("Guest: notified %lu, pinged %lu\n",
gvdev.notifies, receives);
vring_del_virtqueue(vq);
return 0;
}
}
int main(int argc, char *argv[])
{
struct virtio_device vdev;
struct virtqueue *vq;
struct vringh vrh;
struct scatterlist guest_sg[RINGSIZE], *sgs[2];
struct iovec host_riov[2], host_wiov[2];
struct vringh_iov riov, wiov;
struct vring_used_elem used[RINGSIZE];
char buf[28];
u16 head;
int err;
unsigned i;
void *ret;
bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
bool fast_vringh = false, parallel = false;
getrange = getrange_iov;
vdev.features[0] = 0;
while (argv[1]) {
if (strcmp(argv[1], "--indirect") == 0)
vdev.features[0] |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
else if (strcmp(argv[1], "--eventidx") == 0)
vdev.features[0] |= (1 << VIRTIO_RING_F_EVENT_IDX);
else if (strcmp(argv[1], "--slow-range") == 0)
getrange = getrange_slow;
else if (strcmp(argv[1], "--fast-vringh") == 0)
fast_vringh = true;
else if (strcmp(argv[1], "--parallel") == 0)
parallel = true;
else
errx(1, "Unknown arg %s", argv[1]);
argv++;
}
if (parallel)
return parallel_test(vdev.features[0], getrange, fast_vringh);
if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
abort();
__user_addr_max = __user_addr_min + USER_MEM;
memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
/* Set up guest side. */
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
__user_addr_min,
never_notify_host, never_callback_guest,
"guest vq");
/* Set up host side. */
vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
vringh_init_user(&vrh, vdev.features[0], RINGSIZE, true,
vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
/* No descriptor to get yet... */
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
if (err != 0)
errx(1, "vringh_getdesc_user: %i", err);
/* Guest puts in a descriptor. */
memcpy(__user_addr_max - 1, "a", 1);
sg_init_table(guest_sg, 1);
sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
sg_init_table(guest_sg+1, 1);
sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
sgs[0] = &guest_sg[0];
sgs[1] = &guest_sg[1];
/* May allocate an indirect, so force it to allocate user addr */
__kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
if (err)
errx(1, "virtqueue_add_sgs: %i", err);
__kmalloc_fake = NULL;
/* Host retreives it. */
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
if (err != 1)
errx(1, "vringh_getdesc_user: %i", err);
assert(riov.used == 1);
assert(riov.iov[0].iov_base == __user_addr_max - 1);
assert(riov.iov[0].iov_len == 1);
if (getrange != getrange_slow) {
assert(wiov.used == 1);
assert(wiov.iov[0].iov_base == __user_addr_max - 3);
assert(wiov.iov[0].iov_len == 2);
} else {
assert(wiov.used == 2);
assert(wiov.iov[0].iov_base == __user_addr_max - 3);
assert(wiov.iov[0].iov_len == 1);
assert(wiov.iov[1].iov_base == __user_addr_max - 2);
assert(wiov.iov[1].iov_len == 1);
}
err = vringh_iov_pull_user(&riov, buf, 5);
if (err != 1)
errx(1, "vringh_iov_pull_user: %i", err);
assert(buf[0] == 'a');
assert(riov.i == 1);
assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
memcpy(buf, "bcdef", 5);
err = vringh_iov_push_user(&wiov, buf, 5);
if (err != 2)
errx(1, "vringh_iov_push_user: %i", err);
assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
assert(wiov.i == wiov.used);
assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
/* Host is done. */
err = vringh_complete_user(&vrh, head, err);
if (err != 0)
errx(1, "vringh_complete_user: %i", err);
/* Guest should see used token now. */
__kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
__kfree_ignore_end = __kfree_ignore_start + 1;
ret = virtqueue_get_buf(vq, &i);
if (ret != &err)
errx(1, "virtqueue_get_buf: %p", ret);
assert(i == 2);
/* Guest puts in a huge descriptor. */
sg_init_table(guest_sg, RINGSIZE);
for (i = 0; i < RINGSIZE; i++) {
sg_set_buf(&guest_sg[i],
__user_addr_max - USER_MEM/4, USER_MEM/4);
}
/* Fill contents with recognisable garbage. */
for (i = 0; i < USER_MEM/4; i++)
((char *)__user_addr_max - USER_MEM/4)[i] = i;
/* This will allocate an indirect, so force it to allocate user addr */
__kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
if (err)
errx(1, "virtqueue_add_outbuf (large): %i", err);
__kmalloc_fake = NULL;
/* Host picks it up (allocates new iov). */
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
if (err != 1)
errx(1, "vringh_getdesc_user: %i", err);
assert(riov.max_num & VRINGH_IOV_ALLOCATED);
assert(riov.iov != host_riov);
if (getrange != getrange_slow)
assert(riov.used == RINGSIZE);
else
assert(riov.used == RINGSIZE * USER_MEM/4);
assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
assert(wiov.used == 0);
/* Pull data back out (in odd chunks), should be as expected. */
for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
err = vringh_iov_pull_user(&riov, buf, 3);
if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
errx(1, "vringh_iov_pull_user large: %i", err);
assert(buf[0] == (char)i);
assert(err < 2 || buf[1] == (char)(i + 1));
assert(err < 3 || buf[2] == (char)(i + 2));
}
assert(riov.i == riov.used);
vringh_iov_cleanup(&riov);
vringh_iov_cleanup(&wiov);
/* Complete using multi interface, just because we can. */
used[0].id = head;
used[0].len = 0;
err = vringh_complete_multi_user(&vrh, used, 1);
if (err)
errx(1, "vringh_complete_multi_user(1): %i", err);
/* Free up those descriptors. */
ret = virtqueue_get_buf(vq, &i);
if (ret != &err)
errx(1, "virtqueue_get_buf: %p", ret);
/* Add lots of descriptors. */
sg_init_table(guest_sg, 1);
sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
for (i = 0; i < RINGSIZE; i++) {
err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
if (err)
errx(1, "virtqueue_add_outbuf (multiple): %i", err);
}
/* Now get many, and consume them all at once. */
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
for (i = 0; i < RINGSIZE; i++) {
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
if (err != 1)
errx(1, "vringh_getdesc_user: %i", err);
used[i].id = head;
used[i].len = 0;
}
/* Make sure it wraps around ring, to test! */
assert(vrh.vring.used->idx % RINGSIZE != 0);
err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
if (err)
errx(1, "vringh_complete_multi_user: %i", err);
/* Free those buffers. */
for (i = 0; i < RINGSIZE; i++) {
unsigned len;
assert(virtqueue_get_buf(vq, &len) != NULL);
}
/* Test weird (but legal!) indirect. */
if (vdev.features[0] & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
char *data = __user_addr_max - USER_MEM/4;
struct vring_desc *d = __user_addr_max - USER_MEM/2;
struct vring vring;
/* Force creation of direct, which we modify. */
vdev.features[0] &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
__user_addr_min,
never_notify_host,
never_callback_guest,
"guest vq");
sg_init_table(guest_sg, 4);
sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
sg_set_buf(&guest_sg[2], data + 6, 4);
sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
if (err)
errx(1, "virtqueue_add_outbuf (indirect): %i", err);
vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
/* They're used in order, but double-check... */
assert(vring.desc[0].addr == (unsigned long)d);
assert(vring.desc[1].addr == (unsigned long)(d+2));
assert(vring.desc[2].addr == (unsigned long)data + 6);
assert(vring.desc[3].addr == (unsigned long)(d+3));
vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
/* First indirect */
d[0].addr = (unsigned long)data;
d[0].len = 1;
d[0].flags = VRING_DESC_F_NEXT;
d[0].next = 1;
d[1].addr = (unsigned long)data + 1;
d[1].len = 2;
d[1].flags = 0;
/* Second indirect */
d[2].addr = (unsigned long)data + 3;
d[2].len = 3;
d[2].flags = 0;
/* Third indirect */
d[3].addr = (unsigned long)data + 10;
d[3].len = 5;
d[3].flags = VRING_DESC_F_NEXT;
d[3].next = 1;
d[4].addr = (unsigned long)data + 15;
d[4].len = 6;
d[4].flags = VRING_DESC_F_NEXT;
d[4].next = 2;
d[5].addr = (unsigned long)data + 21;
d[5].len = 7;
d[5].flags = 0;
/* Host picks it up (allocates new iov). */
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
if (err != 1)
errx(1, "vringh_getdesc_user: %i", err);
if (head != 0)
errx(1, "vringh_getdesc_user: head %i not 0", head);
assert(riov.max_num & VRINGH_IOV_ALLOCATED);
if (getrange != getrange_slow)
assert(riov.used == 7);
else
assert(riov.used == 28);
err = vringh_iov_pull_user(&riov, buf, 29);
assert(err == 28);
/* Data should be linear. */
for (i = 0; i < err; i++)
assert(buf[i] == i);
vringh_iov_cleanup(&riov);
}
/* Don't leak memory... */
vring_del_virtqueue(vq);
free(__user_addr_min);
return 0;
}