Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

197
fs/nfs/Kconfig Normal file
View file

@ -0,0 +1,197 @@
config NFS_FS
tristate "NFS client support"
depends on INET && FILE_LOCKING
select LOCKD
select SUNRPC
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
computers using Sun's Network File System protocol. To compile
this file system support as a module, choose M here: the module
will be called nfs.
To mount file systems exported by NFS servers, you also need to
install the user space mount.nfs command which can be found in
the Linux nfs-utils package, available from http://linux-nfs.org/.
Information about using the mount command is available in the
mount(8) man page. More detail about the Linux NFS client
implementation is available via the nfs(5) man page.
Below you can choose which versions of the NFS protocol are
available in the kernel to mount NFS servers. Support for NFS
version 2 (RFC 1094) is always available when NFS_FS is selected.
To configure a system which mounts its root file system via NFS
at boot time, say Y here, select "Kernel level IP
autoconfiguration" in the NETWORK menu, and select "Root file
system on NFS" below. You cannot compile this file system as a
module in this case.
If unsure, say N.
config NFS_V2
tristate "NFS client support for NFS version 2"
depends on NFS_FS
default y
help
This option enables support for version 2 of the NFS protocol
(RFC 1094) in the kernel's NFS client.
If unsure, say Y.
config NFS_V3
tristate "NFS client support for NFS version 3"
depends on NFS_FS
default y
help
This option enables support for version 3 of the NFS protocol
(RFC 1813) in the kernel's NFS client.
If unsure, say Y.
config NFS_V3_ACL
bool "NFS client support for the NFSv3 ACL protocol extension"
depends on NFS_V3
help
Some NFS servers support an auxiliary NFSv3 ACL protocol that
Sun added to Solaris but never became an official part of the
NFS version 3 protocol. This protocol extension allows
applications on NFS clients to manipulate POSIX Access Control
Lists on files residing on NFS servers. NFS servers enforce
ACLs on local files whether this protocol is available or not.
Choose Y here if your NFS server supports the Solaris NFSv3 ACL
protocol extension and you want your NFS client to allow
applications to access and modify ACLs on files on the server.
Most NFS servers don't support the Solaris NFSv3 ACL protocol
extension. You can choose N here or specify the "noacl" mount
option to prevent your NFS client from trying to use the NFSv3
ACL protocol.
If unsure, say N.
config NFS_V4
tristate "NFS client support for NFS version 4"
depends on NFS_FS
select SUNRPC_GSS
select KEYS
help
This option enables support for version 4 of the NFS protocol
(RFC 3530) in the kernel's NFS client.
To mount NFS servers using NFSv4, you also need to install user
space programs which can be found in the Linux nfs-utils package,
available from http://linux-nfs.org/.
If unsure, say Y.
config NFS_SWAP
bool "Provide swap over NFS support"
default n
depends on NFS_FS
select SUNRPC_SWAP
help
This option enables swapon to work on files located on NFS mounts.
config NFS_V4_1
bool "NFS client support for NFSv4.1"
depends on NFS_V4
select SUNRPC_BACKCHANNEL
help
This option enables support for minor version 1 of the NFSv4 protocol
(RFC 5661) in the kernel's NFS client.
If unsure, say N.
config NFS_V4_2
bool "NFS client support for NFSv4.2"
depends on NFS_V4_1
help
This option enables support for minor version 2 of the NFSv4 protocol
in the kernel's NFS client.
If unsure, say N.
config PNFS_FILE_LAYOUT
tristate
depends on NFS_V4_1
default NFS_V4
config PNFS_BLOCK
tristate
depends on NFS_V4_1 && BLK_DEV_DM
default NFS_V4
config PNFS_OBJLAYOUT
tristate
depends on NFS_V4_1 && SCSI_OSD_ULD
default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
depends on NFS_V4_1
default "kernel.org"
help
This option defines the domain portion of the implementation ID that
may be sent in the NFS exchange_id operation. The value must be in
the format of a DNS domain name and should be set to the DNS domain
name of the distribution.
If the NFS client is unchanged from the upstream kernel, this
option should be set to the default "kernel.org".
config NFS_V4_1_MIGRATION
bool "NFSv4.1 client support for migration"
depends on NFS_V4_1
default n
help
This option makes the NFS client advertise to NFSv4.1 servers that
it can support NFSv4 migration.
The NFSv4.1 pieces of the Linux NFSv4 migration implementation are
still experimental. If you are not an NFSv4 developer, say N here.
config NFS_V4_SECURITY_LABEL
bool
depends on NFS_V4_2 && SECURITY
default y
config ROOT_NFS
bool "Root file system on NFS"
depends on NFS_FS=y && IP_PNP
help
If you want your system to mount its root file system via NFS,
choose Y here. This is common practice for managing systems
without local permanent storage. For details, read
<file:Documentation/filesystems/nfs/nfsroot.txt>.
Most people say N here.
config NFS_FSCACHE
bool "Provide NFS client caching support"
depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
help
Say Y here if you want NFS data to be cached locally on disc through
the general filesystem cache manager
config NFS_USE_LEGACY_DNS
bool "Use the legacy NFS DNS resolver"
depends on NFS_V4
help
The kernel now provides a method for translating a host name into an
IP address. Select Y here if you would rather use your own DNS
resolver script.
If unsure, say N
config NFS_USE_KERNEL_DNS
bool
depends on NFS_V4 && !NFS_USE_LEGACY_DNS
select DNS_RESOLVER
default y
config NFS_DEBUG
bool
depends on NFS_FS && SUNRPC_DEBUG
select CRC32
default y

35
fs/nfs/Makefile Normal file
View file

@ -0,0 +1,35 @@
#
# Makefile for the Linux nfs filesystem routines.
#
obj-$(CONFIG_NFS_FS) += nfs.o
CFLAGS_nfstrace.o += -I$(src)
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o nfstrace.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
obj-$(CONFIG_NFS_V2) += nfsv2.o
nfsv2-y := nfs2super.o proc.o nfs2xdr.o
obj-$(CONFIG_NFS_V3) += nfsv3.o
nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
obj-$(CONFIG_NFS_V4) += nfsv4.o
CFLAGS_nfs4trace.o += -I$(src)
nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \
dns_resolve.o nfs4trace.o
nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/

View file

@ -0,0 +1,6 @@
#
# Makefile for the pNFS block layout driver kernel module
#
obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o

View file

@ -0,0 +1,925 @@
/*
* linux/fs/nfs/blocklayout/blocklayout.c
*
* Module for the NFSv4.1 pNFS block layout driver.
*
* Copyright (c) 2006 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@citi.umich.edu>
* Fred Isaman <iisaman@umich.edu>
*
* permission is granted to use, copy, create derivative works and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the university of michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. if
* the above copyright notice or any other identification of the
* university of michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* this software is provided as is, without representation from the
* university of michigan as to its fitness for any purpose, and without
* warranty by the university of michigan of any kind, either express
* or implied, including without limitation the implied warranties of
* merchantability and fitness for a particular purpose. the regents
* of the university of michigan shall not be liable for any damages,
* including special, indirect, incidental, or consequential damages,
* with respect to any claim arising out or in connection with the use
* of the software, even if it has been or is hereafter advised of the
* possibility of such damages.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/bio.h> /* struct bio */
#include <linux/prefetch.h>
#include <linux/pagevec.h>
#include "../pnfs.h"
#include "../nfs4session.h"
#include "../internal.h"
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
static bool is_hole(struct pnfs_block_extent *be)
{
switch (be->be_state) {
case PNFS_BLOCK_NONE_DATA:
return true;
case PNFS_BLOCK_INVALID_DATA:
return be->be_tag ? false : true;
default:
return false;
}
}
/* The data we are handed might be spread across several bios. We need
* to track when the last one is finished.
*/
struct parallel_io {
struct kref refcnt;
void (*pnfs_callback) (void *data);
void *data;
};
static inline struct parallel_io *alloc_parallel(void *data)
{
struct parallel_io *rv;
rv = kmalloc(sizeof(*rv), GFP_NOFS);
if (rv) {
rv->data = data;
kref_init(&rv->refcnt);
}
return rv;
}
static inline void get_parallel(struct parallel_io *p)
{
kref_get(&p->refcnt);
}
static void destroy_parallel(struct kref *kref)
{
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
dprintk("%s enter\n", __func__);
p->pnfs_callback(p->data);
kfree(p);
}
static inline void put_parallel(struct parallel_io *p)
{
kref_put(&p->refcnt, destroy_parallel);
}
static struct bio *
bl_submit_bio(int rw, struct bio *bio)
{
if (bio) {
get_parallel(bio->bi_private);
dprintk("%s submitting %s bio %u@%llu\n", __func__,
rw == READ ? "read" : "write", bio->bi_iter.bi_size,
(unsigned long long)bio->bi_iter.bi_sector);
submit_bio(rw, bio);
}
return NULL;
}
static struct bio *
bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
void (*end_io)(struct bio *, int err), struct parallel_io *par)
{
struct bio *bio;
npg = min(npg, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, npg);
if (!bio && (current->flags & PF_MEMALLOC)) {
while (!bio && (npg /= 2))
bio = bio_alloc(GFP_NOIO, npg);
}
if (bio) {
bio->bi_iter.bi_sector = disk_sector;
bio->bi_bdev = bdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
}
return bio;
}
static struct bio *
do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
struct page *page, struct pnfs_block_dev_map *map,
struct pnfs_block_extent *be,
void (*end_io)(struct bio *, int err),
struct parallel_io *par, unsigned int offset, int *len)
{
struct pnfs_block_dev *dev =
container_of(be->be_device, struct pnfs_block_dev, node);
u64 disk_addr, end;
dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
npg, rw, (unsigned long long)isect, offset, *len);
/* translate to device offset */
isect += be->be_v_offset;
isect -= be->be_f_offset;
/* translate to physical disk offset */
disk_addr = (u64)isect << SECTOR_SHIFT;
if (disk_addr < map->start || disk_addr >= map->start + map->len) {
if (!dev->map(dev, disk_addr, map))
return ERR_PTR(-EIO);
bio = bl_submit_bio(rw, bio);
}
disk_addr += map->disk_offset;
disk_addr -= map->start;
/* limit length to what the device mapping allows */
end = disk_addr + *len;
if (end >= map->start + map->len)
*len = map->start + map->len - disk_addr;
retry:
if (!bio) {
bio = bl_alloc_init_bio(npg, map->bdev,
disk_addr >> SECTOR_SHIFT, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
}
if (bio_add_page(bio, page, *len, offset) < *len) {
bio = bl_submit_bio(rw, bio);
goto retry;
}
return bio;
}
static void bl_end_io_read(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
if (err) {
struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
}
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
hdr = container_of(task, struct nfs_pgio_header, task);
pnfs_ld_read_done(hdr);
}
static void
bl_end_par_io_read(void *data)
{
struct nfs_pgio_header *hdr = data;
hdr->task.tk_status = hdr->pnfs_error;
INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
schedule_work(&hdr->task.u.tk_work);
}
static enum pnfs_try_status
bl_read_pagelist(struct nfs_pgio_header *header)
{
struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
struct bio *bio = NULL;
struct pnfs_block_extent be;
sector_t isect, extent_length = 0;
struct parallel_io *par;
loff_t f_offset = header->args.offset;
size_t bytes_left = header->args.count;
unsigned int pg_offset, pg_len;
struct page **pages = header->args.pages;
int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL);
struct blk_plug plug;
int i;
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
header->page_array.npages, f_offset,
(unsigned int)header->args.count);
par = alloc_parallel(header);
if (!par)
return PNFS_NOT_ATTEMPTED;
par->pnfs_callback = bl_end_par_io_read;
blk_start_plug(&plug);
isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
for (i = pg_index; i < header->page_array.npages; i++) {
if (extent_length <= 0) {
/* We've used up the previous extent */
bio = bl_submit_bio(READ, bio);
/* Get the next one */
if (!ext_tree_lookup(bl, isect, &be, false)) {
header->pnfs_error = -EIO;
goto out;
}
extent_length = be.be_length - (isect - be.be_f_offset);
}
pg_offset = f_offset & ~PAGE_CACHE_MASK;
if (is_dio) {
if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
pg_len = PAGE_CACHE_SIZE - pg_offset;
else
pg_len = bytes_left;
} else {
BUG_ON(pg_offset != 0);
pg_len = PAGE_CACHE_SIZE;
}
isect += (pg_offset >> SECTOR_SHIFT);
extent_length -= (pg_offset >> SECTOR_SHIFT);
if (is_hole(&be)) {
bio = bl_submit_bio(READ, bio);
/* Fill hole w/ zeroes w/o accessing device */
dprintk("%s Zeroing page for hole\n", __func__);
zero_user_segment(pages[i], pg_offset, pg_len);
/* invalidate map */
map.start = NFS4_MAX_UINT64;
} else {
bio = do_add_page_to_bio(bio,
header->page_array.npages - i,
READ,
isect, pages[i], &map, &be,
bl_end_io_read, par,
pg_offset, &pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
}
isect += (pg_len >> SECTOR_SHIFT);
extent_length -= (pg_len >> SECTOR_SHIFT);
f_offset += pg_len;
bytes_left -= pg_len;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
header->res.eof = 1;
header->res.count = header->inode->i_size - header->args.offset;
} else {
header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;
}
out:
bl_submit_bio(READ, bio);
blk_finish_plug(&plug);
put_parallel(par);
return PNFS_ATTEMPTED;
}
static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct nfs_pgio_header *header = par->data;
if (!uptodate) {
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
}
/* Function scheduled for call during bl_end_par_io_write,
* it marks sectors as written and extends the commitlist.
*/
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work);
struct nfs_pgio_header *hdr =
container_of(task, struct nfs_pgio_header, task);
dprintk("%s enter\n", __func__);
if (likely(!hdr->pnfs_error)) {
struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK;
u64 end = (hdr->args.offset + hdr->args.count +
PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK;
ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
(end - start) >> SECTOR_SHIFT);
}
pnfs_ld_write_done(hdr);
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data)
{
struct nfs_pgio_header *hdr = data;
hdr->task.tk_status = hdr->pnfs_error;
hdr->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
schedule_work(&hdr->task.u.tk_work);
}
static enum pnfs_try_status
bl_write_pagelist(struct nfs_pgio_header *header, int sync)
{
struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
struct bio *bio = NULL;
struct pnfs_block_extent be;
sector_t isect, extent_length = 0;
struct parallel_io *par = NULL;
loff_t offset = header->args.offset;
size_t count = header->args.count;
struct page **pages = header->args.pages;
int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
unsigned int pg_len;
struct blk_plug plug;
int i;
dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
/* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
*/
par = alloc_parallel(header);
if (!par)
return PNFS_NOT_ATTEMPTED;
par->pnfs_callback = bl_end_par_io_write;
blk_start_plug(&plug);
/* we always write out the whole page */
offset = offset & (loff_t)PAGE_CACHE_MASK;
isect = offset >> SECTOR_SHIFT;
for (i = pg_index; i < header->page_array.npages; i++) {
if (extent_length <= 0) {
/* We've used up the previous extent */
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
if (!ext_tree_lookup(bl, isect, &be, true)) {
header->pnfs_error = -EINVAL;
goto out;
}
extent_length = be.be_length - (isect - be.be_f_offset);
}
pg_len = PAGE_CACHE_SIZE;
bio = do_add_page_to_bio(bio, header->page_array.npages - i,
WRITE, isect, pages[i], &map, &be,
bl_end_io_write, par,
0, &pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
offset += pg_len;
count -= pg_len;
isect += (pg_len >> SECTOR_SHIFT);
extent_length -= (pg_len >> SECTOR_SHIFT);
}
header->res.count = header->args.count;
out:
bl_submit_bio(WRITE, bio);
blk_finish_plug(&plug);
put_parallel(par);
return PNFS_ATTEMPTED;
}
static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
int err;
dprintk("%s enter\n", __func__);
err = ext_tree_remove(bl, true, 0, LLONG_MAX);
WARN_ON(err);
kfree(bl);
}
static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
gfp_t gfp_flags)
{
struct pnfs_block_layout *bl;
dprintk("%s enter\n", __func__);
bl = kzalloc(sizeof(*bl), gfp_flags);
if (!bl)
return NULL;
bl->bl_ext_rw = RB_ROOT;
bl->bl_ext_ro = RB_ROOT;
spin_lock_init(&bl->bl_ext_lock);
return &bl->bl_layout;
}
static void bl_free_lseg(struct pnfs_layout_segment *lseg)
{
dprintk("%s enter\n", __func__);
kfree(lseg);
}
/* Tracks info needed to ensure extents in layout obey constraints of spec */
struct layout_verification {
u32 mode; /* R or RW */
u64 start; /* Expected start of next non-COW extent */
u64 inval; /* Start of INVAL coverage */
u64 cowread; /* End of COW read coverage */
};
/* Verify the extent meets the layout requirements of the pnfs-block draft,
* section 2.3.1.
*/
static int verify_extent(struct pnfs_block_extent *be,
struct layout_verification *lv)
{
if (lv->mode == IOMODE_READ) {
if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
be->be_state == PNFS_BLOCK_INVALID_DATA)
return -EIO;
if (be->be_f_offset != lv->start)
return -EIO;
lv->start += be->be_length;
return 0;
}
/* lv->mode == IOMODE_RW */
if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
if (be->be_f_offset != lv->start)
return -EIO;
if (lv->cowread > lv->start)
return -EIO;
lv->start += be->be_length;
lv->inval = lv->start;
return 0;
} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
if (be->be_f_offset != lv->start)
return -EIO;
lv->start += be->be_length;
return 0;
} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
if (be->be_f_offset > lv->start)
return -EIO;
if (be->be_f_offset < lv->inval)
return -EIO;
if (be->be_f_offset < lv->cowread)
return -EIO;
/* It looks like you might want to min this with lv->start,
* but you really don't.
*/
lv->inval = lv->inval + be->be_length;
lv->cowread = be->be_f_offset + be->be_length;
return 0;
} else
return -EIO;
}
static int decode_sector_number(__be32 **rp, sector_t *sp)
{
uint64_t s;
*rp = xdr_decode_hyper(*rp, &s);
if (s & 0x1ff) {
printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
return -1;
}
*sp = s >> SECTOR_SHIFT;
return 0;
}
static int
bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
struct layout_verification *lv, struct list_head *extents,
gfp_t gfp_mask)
{
struct pnfs_block_extent *be;
struct nfs4_deviceid id;
int error;
__be32 *p;
p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE);
if (!p)
return -EIO;
be = kzalloc(sizeof(*be), GFP_NOFS);
if (!be)
return -ENOMEM;
memcpy(&id, p, NFS4_DEVICEID4_SIZE);
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
error = -EIO;
be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
lo->plh_lc_cred, gfp_mask);
if (!be->be_device)
goto out_free_be;
/*
* The next three values are read in as bytes, but stored in the
* extent structure in 512-byte granularity.
*/
if (decode_sector_number(&p, &be->be_f_offset) < 0)
goto out_put_deviceid;
if (decode_sector_number(&p, &be->be_length) < 0)
goto out_put_deviceid;
if (decode_sector_number(&p, &be->be_v_offset) < 0)
goto out_put_deviceid;
be->be_state = be32_to_cpup(p++);
error = verify_extent(be, lv);
if (error) {
dprintk("%s: extent verification failed\n", __func__);
goto out_put_deviceid;
}
list_add_tail(&be->be_list, extents);
return 0;
out_put_deviceid:
nfs4_put_deviceid_node(be->be_device);
out_free_be:
kfree(be);
return error;
}
static struct pnfs_layout_segment *
bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
gfp_t gfp_mask)
{
struct layout_verification lv = {
.mode = lgr->range.iomode,
.start = lgr->range.offset >> SECTOR_SHIFT,
.inval = lgr->range.offset >> SECTOR_SHIFT,
.cowread = lgr->range.offset >> SECTOR_SHIFT,
};
struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
struct pnfs_layout_segment *lseg;
struct xdr_buf buf;
struct xdr_stream xdr;
struct page *scratch;
int status, i;
uint32_t count;
__be32 *p;
LIST_HEAD(extents);
dprintk("---> %s\n", __func__);
lseg = kzalloc(sizeof(*lseg), gfp_mask);
if (!lseg)
return ERR_PTR(-ENOMEM);
status = -ENOMEM;
scratch = alloc_page(gfp_mask);
if (!scratch)
goto out;
xdr_init_decode_pages(&xdr, &buf,
lgr->layoutp->pages, lgr->layoutp->len);
xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
status = -EIO;
p = xdr_inline_decode(&xdr, 4);
if (unlikely(!p))
goto out_free_scratch;
count = be32_to_cpup(p++);
dprintk("%s: number of extents %d\n", __func__, count);
/*
* Decode individual extents, putting them in temporary staging area
* until whole layout is decoded to make error recovery easier.
*/
for (i = 0; i < count; i++) {
status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask);
if (status)
goto process_extents;
}
if (lgr->range.offset + lgr->range.length !=
lv.start << SECTOR_SHIFT) {
dprintk("%s Final length mismatch\n", __func__);
status = -EIO;
goto process_extents;
}
if (lv.start < lv.cowread) {
dprintk("%s Final uncovered COW extent\n", __func__);
status = -EIO;
}
process_extents:
while (!list_empty(&extents)) {
struct pnfs_block_extent *be =
list_first_entry(&extents, struct pnfs_block_extent,
be_list);
list_del(&be->be_list);
if (!status)
status = ext_tree_insert(bl, be);
if (status) {
nfs4_put_deviceid_node(be->be_device);
kfree(be);
}
}
out_free_scratch:
__free_page(scratch);
out:
dprintk("%s returns %d\n", __func__, status);
if (status) {
kfree(lseg);
return ERR_PTR(status);
}
return lseg;
}
static void
bl_return_range(struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range)
{
struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
sector_t offset = range->offset >> SECTOR_SHIFT, end;
if (range->offset % 8) {
dprintk("%s: offset %lld not block size aligned\n",
__func__, range->offset);
return;
}
if (range->length != NFS4_MAX_UINT64) {
if (range->length % 8) {
dprintk("%s: length %lld not block size aligned\n",
__func__, range->length);
return;
}
end = offset + (range->length >> SECTOR_SHIFT);
} else {
end = round_down(NFS4_MAX_UINT64, PAGE_SIZE);
}
ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end);
}
static int
bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg)
{
return ext_tree_prepare_commit(arg);
}
static void
bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
{
ext_tree_mark_committed(&lcdata->args, lcdata->res.status);
}
static int
bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
{
dprintk("%s enter\n", __func__);
if (server->pnfs_blksize == 0) {
dprintk("%s Server did not return blksize\n", __func__);
return -EINVAL;
}
if (server->pnfs_blksize > PAGE_SIZE) {
printk(KERN_ERR "%s: pNFS blksize %d not supported.\n",
__func__, server->pnfs_blksize);
return -EINVAL;
}
return 0;
}
static bool
is_aligned_req(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, unsigned int alignment)
{
/*
* Always accept buffered writes, higher layers take care of the
* right alignment.
*/
if (pgio->pg_dreq == NULL)
return true;
if (!IS_ALIGNED(req->wb_offset, alignment))
return false;
if (IS_ALIGNED(req->wb_bytes, alignment))
return true;
if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) {
/*
* If the write goes up to the inode size, just write
* the full page. Data past the inode size is
* guaranteed to be zeroed by the higher level client
* code, and this behaviour is mandated by RFC 5663
* section 2.3.2.
*/
return true;
}
return false;
}
static void
bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
if (!is_aligned_req(pgio, req, SECTOR_SIZE)) {
nfs_pageio_reset_read_mds(pgio);
return;
}
pnfs_generic_pg_init_read(pgio, req);
}
/*
* Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
* of bytes (maximum @req->wb_bytes) that can be coalesced.
*/
static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (!is_aligned_req(pgio, req, SECTOR_SIZE))
return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
/*
* Return the number of contiguous bytes for a given inode
* starting at page frame idx.
*/
static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
{
struct address_space *mapping = inode->i_mapping;
pgoff_t end;
/* Optimize common case that writes from 0 to end of file */
end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
if (end != NFS_I(inode)->npages) {
rcu_read_lock();
end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
rcu_read_unlock();
}
if (!end)
return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT);
else
return (end - idx) << PAGE_CACHE_SHIFT;
}
static void
bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 wb_size;
if (!is_aligned_req(pgio, req, PAGE_SIZE)) {
nfs_pageio_reset_write_mds(pgio);
return;
}
if (pgio->pg_dreq == NULL)
wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
req->wb_index);
else
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
pnfs_generic_pg_init_write(pgio, req, wb_size);
}
/*
* Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
* of bytes (maximum @req->wb_bytes) that can be coalesced.
*/
static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (!is_aligned_req(pgio, req, PAGE_SIZE))
return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
static const struct nfs_pageio_ops bl_pg_read_ops = {
.pg_init = bl_pg_init_read,
.pg_test = bl_pg_test_read,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops bl_pg_write_ops = {
.pg_init = bl_pg_init_write,
.pg_test = bl_pg_test_write,
.pg_doio = pnfs_generic_pg_writepages,
};
static struct pnfs_layoutdriver_type blocklayout_type = {
.id = LAYOUT_BLOCK_VOLUME,
.name = "LAYOUT_BLOCK_VOLUME",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_READ_WHOLE_PAGE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
.alloc_layout_hdr = bl_alloc_layout_hdr,
.free_layout_hdr = bl_free_layout_hdr,
.alloc_lseg = bl_alloc_lseg,
.free_lseg = bl_free_lseg,
.return_range = bl_return_range,
.prepare_layoutcommit = bl_prepare_layoutcommit,
.cleanup_layoutcommit = bl_cleanup_layoutcommit,
.set_layoutdriver = bl_set_layoutdriver,
.alloc_deviceid_node = bl_alloc_deviceid_node,
.free_deviceid_node = bl_free_deviceid_node,
.pg_read_ops = &bl_pg_read_ops,
.pg_write_ops = &bl_pg_write_ops,
};
static int __init nfs4blocklayout_init(void)
{
int ret;
dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
ret = pnfs_register_layoutdriver(&blocklayout_type);
if (ret)
goto out;
ret = bl_init_pipefs();
if (ret)
goto out_unregister;
return 0;
out_unregister:
pnfs_unregister_layoutdriver(&blocklayout_type);
out:
return ret;
}
static void __exit nfs4blocklayout_exit(void)
{
dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
__func__);
bl_cleanup_pipefs();
pnfs_unregister_layoutdriver(&blocklayout_type);
}
MODULE_ALIAS("nfs-layouttype4-3");
module_init(nfs4blocklayout_init);
module_exit(nfs4blocklayout_exit);

View file

@ -0,0 +1,204 @@
/*
* linux/fs/nfs/blocklayout/blocklayout.h
*
* Module for the NFSv4.1 pNFS block layout driver.
*
* Copyright (c) 2006 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@citi.umich.edu>
* Fred Isaman <iisaman@umich.edu>
*
* permission is granted to use, copy, create derivative works and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the university of michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. if
* the above copyright notice or any other identification of the
* university of michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* this software is provided as is, without representation from the
* university of michigan as to its fitness for any purpose, and without
* warranty by the university of michigan of any kind, either express
* or implied, including without limitation the implied warranties of
* merchantability and fitness for a particular purpose. the regents
* of the university of michigan shall not be liable for any damages,
* including special, indirect, incidental, or consequential damages,
* with respect to any claim arising out or in connection with the use
* of the software, even if it has been or is hereafter advised of the
* possibility of such damages.
*/
#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
#define FS_NFS_NFS4BLOCKLAYOUT_H
#include <linux/device-mapper.h>
#include <linux/nfs_fs.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include "../nfs4_fs.h"
#include "../pnfs.h"
#include "../netns.h"
#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
struct pnfs_block_dev;
enum pnfs_block_volume_type {
PNFS_BLOCK_VOLUME_SIMPLE = 0,
PNFS_BLOCK_VOLUME_SLICE = 1,
PNFS_BLOCK_VOLUME_CONCAT = 2,
PNFS_BLOCK_VOLUME_STRIPE = 3,
};
#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
* Not actually limited by the protocol.
*/
#define PNFS_BLOCK_UUID_LEN 128
struct pnfs_block_volume {
enum pnfs_block_volume_type type;
union {
struct {
int len;
int nr_sigs;
struct {
u64 offset;
u32 sig_len;
u8 sig[PNFS_BLOCK_UUID_LEN];
} sigs[PNFS_BLOCK_MAX_UUIDS];
} simple;
struct {
u64 start;
u64 len;
u32 volume;
} slice;
struct {
u32 volumes_count;
u32 volumes[PNFS_BLOCK_MAX_DEVICES];
} concat;
struct {
u64 chunk_size;
u32 volumes_count;
u32 volumes[PNFS_BLOCK_MAX_DEVICES];
} stripe;
};
};
struct pnfs_block_dev_map {
sector_t start;
sector_t len;
sector_t disk_offset;
struct block_device *bdev;
};
struct pnfs_block_dev {
struct nfs4_deviceid_node node;
u64 start;
u64 len;
u32 nr_children;
struct pnfs_block_dev *children;
u64 chunk_size;
struct block_device *bdev;
u64 disk_offset;
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
enum exstate4 {
PNFS_BLOCK_READWRITE_DATA = 0,
PNFS_BLOCK_READ_DATA = 1,
PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */
PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */
};
/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
struct rb_node be_node;
struct list_head be_list;
};
struct nfs4_deviceid_node *be_device;
sector_t be_f_offset; /* the starting offset in the file */
sector_t be_length; /* the size of the extent */
sector_t be_v_offset; /* the starting offset in the volume */
enum exstate4 be_state; /* the state of this extent */
#define EXTENT_WRITTEN 1
#define EXTENT_COMMITTING 2
unsigned int be_tag;
};
/* on the wire size of the extent */
#define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)
struct pnfs_block_layout {
struct pnfs_layout_hdr bl_layout;
struct rb_root bl_ext_rw;
struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */
};
static inline struct pnfs_block_layout *
BLK_LO2EXT(struct pnfs_layout_hdr *lo)
{
return container_of(lo, struct pnfs_block_layout, bl_layout);
}
static inline struct pnfs_block_layout *
BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
{
return BLK_LO2EXT(lseg->pls_layout);
}
struct bl_pipe_msg {
struct rpc_pipe_msg msg;
wait_queue_head_t *bl_wq;
};
struct bl_msg_hdr {
u8 type;
u16 totallen; /* length of entire message, including hdr itself */
};
#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/
#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* dev.c */
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
/* extent_tree.c */
int ext_tree_insert(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new);
int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
sector_t end);
int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t len);
bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent *ret, bool rw);
int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
/* rpc_pipefs.c */
dev_t bl_resolve_deviceid(struct nfs_server *server,
struct pnfs_block_volume *b, gfp_t gfp_mask);
int __init bl_init_pipefs(void);
void __exit bl_cleanup_pipefs(void);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */

363
fs/nfs/blocklayout/dev.c Normal file
View file

@ -0,0 +1,363 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/blkdev.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
static void
bl_free_device(struct pnfs_block_dev *dev)
{
if (dev->nr_children) {
int i;
for (i = 0; i < dev->nr_children; i++)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
if (dev->bdev)
blkdev_put(dev->bdev, FMODE_READ);
}
}
void
bl_free_deviceid_node(struct nfs4_deviceid_node *d)
{
struct pnfs_block_dev *dev =
container_of(d, struct pnfs_block_dev, node);
bl_free_device(dev);
kfree(dev);
}
static int
nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
{
__be32 *p;
int i;
p = xdr_inline_decode(xdr, 4);
if (!p)
return -EIO;
b->type = be32_to_cpup(p++);
switch (b->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
p = xdr_inline_decode(xdr, 4);
if (!p)
return -EIO;
b->simple.nr_sigs = be32_to_cpup(p++);
if (!b->simple.nr_sigs) {
dprintk("no signature\n");
return -EIO;
}
b->simple.len = 4 + 4;
for (i = 0; i < b->simple.nr_sigs; i++) {
p = xdr_inline_decode(xdr, 8 + 4);
if (!p)
return -EIO;
p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
b->simple.sigs[i].sig_len = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
if (!p)
return -EIO;
memcpy(&b->simple.sigs[i].sig, p,
b->simple.sigs[i].sig_len);
b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
}
break;
case PNFS_BLOCK_VOLUME_SLICE:
p = xdr_inline_decode(xdr, 8 + 8 + 4);
if (!p)
return -EIO;
p = xdr_decode_hyper(p, &b->slice.start);
p = xdr_decode_hyper(p, &b->slice.len);
b->slice.volume = be32_to_cpup(p++);
break;
case PNFS_BLOCK_VOLUME_CONCAT:
p = xdr_inline_decode(xdr, 4);
if (!p)
return -EIO;
b->concat.volumes_count = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
if (!p)
return -EIO;
for (i = 0; i < b->concat.volumes_count; i++)
b->concat.volumes[i] = be32_to_cpup(p++);
break;
case PNFS_BLOCK_VOLUME_STRIPE:
p = xdr_inline_decode(xdr, 8 + 4);
if (!p)
return -EIO;
p = xdr_decode_hyper(p, &b->stripe.chunk_size);
b->stripe.volumes_count = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
if (!p)
return -EIO;
for (i = 0; i < b->stripe.volumes_count; i++)
b->stripe.volumes[i] = be32_to_cpup(p++);
break;
default:
dprintk("unknown volume type!\n");
return -EIO;
}
return 0;
}
static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map)
{
map->start = dev->start;
map->len = dev->len;
map->disk_offset = dev->disk_offset;
map->bdev = dev->bdev;
return true;
}
static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map)
{
int i;
for (i = 0; i < dev->nr_children; i++) {
struct pnfs_block_dev *child = &dev->children[i];
if (child->start > offset ||
child->start + child->len <= offset)
continue;
child->map(child, offset - child->start, map);
return true;
}
dprintk("%s: ran off loop!\n", __func__);
return false;
}
static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map)
{
struct pnfs_block_dev *child;
u64 chunk;
u32 chunk_idx;
u64 disk_offset;
chunk = div_u64(offset, dev->chunk_size);
div_u64_rem(chunk, dev->nr_children, &chunk_idx);
if (chunk_idx > dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
__func__, chunk_idx, offset, dev->chunk_size);
/* error, should not happen */
return false;
}
/* truncate offset to the beginning of the stripe */
offset = chunk * dev->chunk_size;
/* disk offset of the stripe */
disk_offset = div_u64(offset, dev->nr_children);
child = &dev->children[chunk_idx];
child->map(child, disk_offset, map);
map->start += offset;
map->disk_offset += disk_offset;
map->len = dev->chunk_size;
return true;
}
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
static int
bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev)
return -EIO;
d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
if (IS_ERR(d->bdev)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
return PTR_ERR(d->bdev);
}
d->len = i_size_read(d->bdev->bd_inode);
d->map = bl_map_simple;
printk(KERN_INFO "pNFS: using block device %s\n",
d->bdev->bd_disk->disk_name);
return 0;
}
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
int ret;
ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
if (ret)
return ret;
d->disk_offset = v->slice.start;
d->len = v->slice.len;
return 0;
}
static int
bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
int ret, i;
d->children = kcalloc(v->concat.volumes_count,
sizeof(struct pnfs_block_dev), GFP_KERNEL);
if (!d->children)
return -ENOMEM;
for (i = 0; i < v->concat.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i],
volumes, v->concat.volumes[i], gfp_mask);
if (ret)
return ret;
d->nr_children++;
d->children[i].start += len;
len += d->children[i].len;
}
d->len = len;
d->map = bl_map_concat;
return 0;
}
static int
bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
int ret, i;
d->children = kcalloc(v->stripe.volumes_count,
sizeof(struct pnfs_block_dev), GFP_KERNEL);
if (!d->children)
return -ENOMEM;
for (i = 0; i < v->stripe.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i],
volumes, v->stripe.volumes[i], gfp_mask);
if (ret)
return ret;
d->nr_children++;
len += d->children[i].len;
}
d->len = len;
d->chunk_size = v->stripe.chunk_size;
d->map = bl_map_stripe;
return 0;
}
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
switch (volumes[idx].type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
return bl_parse_slice(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_CONCAT:
return bl_parse_concat(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
default:
dprintk("unsupported volume type: %d\n", volumes[idx].type);
return -EIO;
}
}
struct nfs4_deviceid_node *
bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node = NULL;
struct pnfs_block_volume *volumes;
struct pnfs_block_dev *top;
struct xdr_stream xdr;
struct xdr_buf buf;
struct page *scratch;
int nr_volumes, ret, i;
__be32 *p;
scratch = alloc_page(gfp_mask);
if (!scratch)
goto out;
xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
p = xdr_inline_decode(&xdr, sizeof(__be32));
if (!p)
goto out_free_scratch;
nr_volumes = be32_to_cpup(p++);
volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
gfp_mask);
if (!volumes)
goto out_free_scratch;
for (i = 0; i < nr_volumes; i++) {
ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
if (ret < 0)
goto out_free_volumes;
}
top = kzalloc(sizeof(*top), gfp_mask);
if (!top)
goto out_free_volumes;
ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
if (ret) {
bl_free_device(top);
kfree(top);
goto out_free_volumes;
}
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
out_free_volumes:
kfree(volumes);
out_free_scratch:
__free_page(scratch);
out:
return node;
}

View file

@ -0,0 +1,602 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
#include <linux/vmalloc.h>
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
static inline struct pnfs_block_extent *
ext_node(struct rb_node *node)
{
return rb_entry(node, struct pnfs_block_extent, be_node);
}
static struct pnfs_block_extent *
ext_tree_first(struct rb_root *root)
{
struct rb_node *node = rb_first(root);
return node ? ext_node(node) : NULL;
}
static struct pnfs_block_extent *
ext_tree_prev(struct pnfs_block_extent *be)
{
struct rb_node *node = rb_prev(&be->be_node);
return node ? ext_node(node) : NULL;
}
static struct pnfs_block_extent *
ext_tree_next(struct pnfs_block_extent *be)
{
struct rb_node *node = rb_next(&be->be_node);
return node ? ext_node(node) : NULL;
}
static inline sector_t
ext_f_end(struct pnfs_block_extent *be)
{
return be->be_f_offset + be->be_length;
}
static struct pnfs_block_extent *
__ext_tree_search(struct rb_root *root, sector_t start)
{
struct rb_node *node = root->rb_node;
struct pnfs_block_extent *be = NULL;
while (node) {
be = ext_node(node);
if (start < be->be_f_offset)
node = node->rb_left;
else if (start >= ext_f_end(be))
node = node->rb_right;
else
return be;
}
if (be) {
if (start < be->be_f_offset)
return be;
if (start >= ext_f_end(be))
return ext_tree_next(be);
}
return NULL;
}
static bool
ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2)
{
if (be1->be_state != be2->be_state)
return false;
if (be1->be_device != be2->be_device)
return false;
if (be1->be_f_offset + be1->be_length != be2->be_f_offset)
return false;
if (be1->be_state != PNFS_BLOCK_NONE_DATA &&
(be1->be_v_offset + be1->be_length != be2->be_v_offset))
return false;
if (be1->be_state == PNFS_BLOCK_INVALID_DATA &&
be1->be_tag != be2->be_tag)
return false;
return true;
}
static struct pnfs_block_extent *
ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be)
{
struct pnfs_block_extent *left = ext_tree_prev(be);
if (left && ext_can_merge(left, be)) {
left->be_length += be->be_length;
rb_erase(&be->be_node, root);
nfs4_put_deviceid_node(be->be_device);
kfree(be);
return left;
}
return be;
}
static struct pnfs_block_extent *
ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
{
struct pnfs_block_extent *right = ext_tree_next(be);
if (right && ext_can_merge(be, right)) {
be->be_length += right->be_length;
rb_erase(&right->be_node, root);
nfs4_put_deviceid_node(right->be_device);
kfree(right);
}
return be;
}
static void
__ext_tree_insert(struct rb_root *root,
struct pnfs_block_extent *new, bool merge_ok)
{
struct rb_node **p = &root->rb_node, *parent = NULL;
struct pnfs_block_extent *be;
while (*p) {
parent = *p;
be = ext_node(parent);
if (new->be_f_offset < be->be_f_offset) {
if (merge_ok && ext_can_merge(new, be)) {
be->be_f_offset = new->be_f_offset;
if (be->be_state != PNFS_BLOCK_NONE_DATA)
be->be_v_offset = new->be_v_offset;
be->be_length += new->be_length;
be = ext_try_to_merge_left(root, be);
goto free_new;
}
p = &(*p)->rb_left;
} else if (new->be_f_offset >= ext_f_end(be)) {
if (merge_ok && ext_can_merge(be, new)) {
be->be_length += new->be_length;
be = ext_try_to_merge_right(root, be);
goto free_new;
}
p = &(*p)->rb_right;
} else {
BUG();
}
}
rb_link_node(&new->be_node, parent, p);
rb_insert_color(&new->be_node, root);
return;
free_new:
nfs4_put_deviceid_node(new->be_device);
kfree(new);
}
static int
__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
{
struct pnfs_block_extent *be;
sector_t len1 = 0, len2 = 0;
sector_t orig_v_offset;
sector_t orig_len;
be = __ext_tree_search(root, start);
if (!be)
return 0;
if (be->be_f_offset >= end)
return 0;
orig_v_offset = be->be_v_offset;
orig_len = be->be_length;
if (start > be->be_f_offset)
len1 = start - be->be_f_offset;
if (ext_f_end(be) > end)
len2 = ext_f_end(be) - end;
if (len2 > 0) {
if (len1 > 0) {
struct pnfs_block_extent *new;
new = kzalloc(sizeof(*new), GFP_ATOMIC);
if (!new)
return -ENOMEM;
be->be_length = len1;
new->be_f_offset = end;
if (be->be_state != PNFS_BLOCK_NONE_DATA) {
new->be_v_offset =
orig_v_offset + orig_len - len2;
}
new->be_length = len2;
new->be_state = be->be_state;
new->be_tag = be->be_tag;
new->be_device = nfs4_get_deviceid(be->be_device);
__ext_tree_insert(root, new, true);
} else {
be->be_f_offset = end;
if (be->be_state != PNFS_BLOCK_NONE_DATA) {
be->be_v_offset =
orig_v_offset + orig_len - len2;
}
be->be_length = len2;
}
} else {
if (len1 > 0) {
be->be_length = len1;
be = ext_tree_next(be);
}
while (be && ext_f_end(be) <= end) {
struct pnfs_block_extent *next = ext_tree_next(be);
rb_erase(&be->be_node, root);
nfs4_put_deviceid_node(be->be_device);
kfree(be);
be = next;
}
if (be && be->be_f_offset < end) {
len1 = ext_f_end(be) - end;
be->be_f_offset = end;
if (be->be_state != PNFS_BLOCK_NONE_DATA)
be->be_v_offset += be->be_length - len1;
be->be_length = len1;
}
}
return 0;
}
int
ext_tree_insert(struct pnfs_block_layout *bl, struct pnfs_block_extent *new)
{
struct pnfs_block_extent *be;
struct rb_root *root;
int err = 0;
switch (new->be_state) {
case PNFS_BLOCK_READWRITE_DATA:
case PNFS_BLOCK_INVALID_DATA:
root = &bl->bl_ext_rw;
break;
case PNFS_BLOCK_READ_DATA:
case PNFS_BLOCK_NONE_DATA:
root = &bl->bl_ext_ro;
break;
default:
dprintk("invalid extent type\n");
return -EINVAL;
}
spin_lock(&bl->bl_ext_lock);
retry:
be = __ext_tree_search(root, new->be_f_offset);
if (!be || be->be_f_offset >= ext_f_end(new)) {
__ext_tree_insert(root, new, true);
} else if (new->be_f_offset >= be->be_f_offset) {
if (ext_f_end(new) <= ext_f_end(be)) {
nfs4_put_deviceid_node(new->be_device);
kfree(new);
} else {
sector_t new_len = ext_f_end(new) - ext_f_end(be);
sector_t diff = new->be_length - new_len;
new->be_f_offset += diff;
new->be_v_offset += diff;
new->be_length = new_len;
goto retry;
}
} else if (ext_f_end(new) <= ext_f_end(be)) {
new->be_length = be->be_f_offset - new->be_f_offset;
__ext_tree_insert(root, new, true);
} else {
struct pnfs_block_extent *split;
sector_t new_len = ext_f_end(new) - ext_f_end(be);
sector_t diff = new->be_length - new_len;
split = kmemdup(new, sizeof(*new), GFP_ATOMIC);
if (!split) {
err = -EINVAL;
goto out;
}
split->be_length = be->be_f_offset - split->be_f_offset;
split->be_device = nfs4_get_deviceid(new->be_device);
__ext_tree_insert(root, split, true);
new->be_f_offset += diff;
new->be_v_offset += diff;
new->be_length = new_len;
goto retry;
}
out:
spin_unlock(&bl->bl_ext_lock);
return err;
}
static bool
__ext_tree_lookup(struct rb_root *root, sector_t isect,
struct pnfs_block_extent *ret)
{
struct rb_node *node;
struct pnfs_block_extent *be;
node = root->rb_node;
while (node) {
be = ext_node(node);
if (isect < be->be_f_offset)
node = node->rb_left;
else if (isect >= ext_f_end(be))
node = node->rb_right;
else {
*ret = *be;
return true;
}
}
return false;
}
bool
ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent *ret, bool rw)
{
bool found = false;
spin_lock(&bl->bl_ext_lock);
if (!rw)
found = __ext_tree_lookup(&bl->bl_ext_ro, isect, ret);
if (!found)
found = __ext_tree_lookup(&bl->bl_ext_rw, isect, ret);
spin_unlock(&bl->bl_ext_lock);
return found;
}
int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,
sector_t start, sector_t end)
{
int err, err2;
spin_lock(&bl->bl_ext_lock);
err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
if (rw) {
err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
if (!err)
err = err2;
}
spin_unlock(&bl->bl_ext_lock);
return err;
}
static int
ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
sector_t split)
{
struct pnfs_block_extent *new;
sector_t orig_len = be->be_length;
new = kzalloc(sizeof(*new), GFP_ATOMIC);
if (!new)
return -ENOMEM;
be->be_length = split - be->be_f_offset;
new->be_f_offset = split;
if (be->be_state != PNFS_BLOCK_NONE_DATA)
new->be_v_offset = be->be_v_offset + be->be_length;
new->be_length = orig_len - be->be_length;
new->be_state = be->be_state;
new->be_tag = be->be_tag;
new->be_device = nfs4_get_deviceid(be->be_device);
__ext_tree_insert(root, new, false);
return 0;
}
int
ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t len)
{
struct rb_root *root = &bl->bl_ext_rw;
sector_t end = start + len;
struct pnfs_block_extent *be;
int err = 0;
spin_lock(&bl->bl_ext_lock);
/*
* First remove all COW extents or holes from written to range.
*/
err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
if (err)
goto out;
/*
* Then mark all invalid extents in the range as written to.
*/
for (be = __ext_tree_search(root, start); be; be = ext_tree_next(be)) {
if (be->be_f_offset >= end)
break;
if (be->be_state != PNFS_BLOCK_INVALID_DATA || be->be_tag)
continue;
if (be->be_f_offset < start) {
struct pnfs_block_extent *left = ext_tree_prev(be);
if (left && ext_can_merge(left, be)) {
sector_t diff = start - be->be_f_offset;
left->be_length += diff;
be->be_f_offset += diff;
be->be_v_offset += diff;
be->be_length -= diff;
} else {
err = ext_tree_split(root, be, start);
if (err)
goto out;
}
}
if (ext_f_end(be) > end) {
struct pnfs_block_extent *right = ext_tree_next(be);
if (right && ext_can_merge(be, right)) {
sector_t diff = end - be->be_f_offset;
be->be_length -= diff;
right->be_f_offset -= diff;
right->be_v_offset -= diff;
right->be_length += diff;
} else {
err = ext_tree_split(root, be, end);
if (err)
goto out;
}
}
if (be->be_f_offset >= start && ext_f_end(be) <= end) {
be->be_tag = EXTENT_WRITTEN;
be = ext_try_to_merge_left(root, be);
be = ext_try_to_merge_right(root, be);
}
}
out:
spin_unlock(&bl->bl_ext_lock);
return err;
}
static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
size_t buffer_size)
{
if (arg->layoutupdate_pages != &arg->layoutupdate_page) {
int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i;
for (i = 0; i < nr_pages; i++)
put_page(arg->layoutupdate_pages[i]);
kfree(arg->layoutupdate_pages);
} else {
put_page(arg->layoutupdate_page);
}
}
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count)
{
struct pnfs_block_extent *be;
int ret = 0;
spin_lock(&bl->bl_ext_lock);
for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
be->be_tag != EXTENT_WRITTEN)
continue;
(*count)++;
if (*count * BL_EXTENT_SIZE > buffer_size) {
/* keep counting.. */
ret = -ENOSPC;
continue;
}
p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
NFS4_DEVICEID4_SIZE);
p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
p = xdr_encode_hyper(p, 0LL);
*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
be->be_tag = EXTENT_COMMITTING;
}
spin_unlock(&bl->bl_ext_lock);
return ret;
}
int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
{
struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
size_t count = 0, buffer_size = PAGE_SIZE;
__be32 *start_p;
int ret;
dprintk("%s enter\n", __func__);
arg->layoutupdate_page = alloc_page(GFP_NOFS);
if (!arg->layoutupdate_page)
return -ENOMEM;
start_p = page_address(arg->layoutupdate_page);
arg->layoutupdate_pages = &arg->layoutupdate_page;
retry:
ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count;
count = 0;
arg->layoutupdate_pages =
kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE),
sizeof(struct page *), GFP_NOFS);
if (!arg->layoutupdate_pages)
return -ENOMEM;
start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL);
if (!start_p) {
kfree(arg->layoutupdate_pages);
return -ENOMEM;
}
goto retry;
}
*start_p = cpu_to_be32(count);
arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count;
if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
__be32 *p = start_p;
int i = 0;
for (p = start_p;
p < start_p + arg->layoutupdate_len;
p += PAGE_SIZE) {
arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
}
}
dprintk("%s found %zu ranges\n", __func__, count);
return 0;
}
void
ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status)
{
struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
struct rb_root *root = &bl->bl_ext_rw;
struct pnfs_block_extent *be;
dprintk("%s status %d\n", __func__, status);
ext_tree_free_commitdata(arg, arg->layoutupdate_len);
spin_lock(&bl->bl_ext_lock);
for (be = ext_tree_first(root); be; be = ext_tree_next(be)) {
if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
be->be_tag != EXTENT_COMMITTING)
continue;
if (status) {
/*
* Mark as written and try again.
*
* XXX: some real error handling here wouldn't hurt..
*/
be->be_tag = EXTENT_WRITTEN;
} else {
be->be_state = PNFS_BLOCK_READWRITE_DATA;
be->be_tag = 0;
}
be = ext_try_to_merge_left(root, be);
be = ext_try_to_merge_right(root, be);
}
spin_unlock(&bl->bl_ext_lock);
}

View file

@ -0,0 +1,288 @@
/*
* Copyright (c) 2006,2007 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@citi.umich.edu>
* Fred Isaman <iisaman@umich.edu>
*
* permission is granted to use, copy, create derivative works and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the university of michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. if
* the above copyright notice or any other identification of the
* university of michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* this software is provided as is, without representation from the
* university of michigan as to its fitness for any purpose, and without
* warranty by the university of michigan of any kind, either express
* or implied, including without limitation the implied warranties of
* merchantability and fitness for a particular purpose. the regents
* of the university of michigan shall not be liable for any damages,
* including special, indirect, incidental, or consequential damages,
* with respect to any claim arising out or in connection with the use
* of the software, even if it has been or is hereafter advised of the
* possibility of such damages.
*/
#include <linux/module.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
static void
nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b)
{
int i;
*p++ = cpu_to_be32(1);
*p++ = cpu_to_be32(b->type);
*p++ = cpu_to_be32(b->simple.nr_sigs);
for (i = 0; i < b->simple.nr_sigs; i++) {
p = xdr_encode_hyper(p, b->simple.sigs[i].offset);
p = xdr_encode_opaque(p, b->simple.sigs[i].sig,
b->simple.sigs[i].sig_len);
}
}
dev_t
bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
gfp_t gfp_mask)
{
struct net *net = server->nfs_client->cl_net;
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct bl_dev_msg *reply = &nn->bl_mount_reply;
struct bl_pipe_msg bl_pipe_msg;
struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
struct bl_msg_hdr *bl_msg;
DECLARE_WAITQUEUE(wq, current);
dev_t dev = 0;
int rc;
dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
mutex_lock(&nn->bl_mutex);
bl_pipe_msg.bl_wq = &nn->bl_wq;
b->simple.len += 4; /* single volume */
if (b->simple.len > PAGE_SIZE)
goto out_unlock;
memset(msg, 0, sizeof(*msg));
msg->len = sizeof(*bl_msg) + b->simple.len;
msg->data = kzalloc(msg->len, gfp_mask);
if (!msg->data)
goto out_free_data;
bl_msg = msg->data;
bl_msg->type = BL_DEVICE_MOUNT,
bl_msg->totallen = b->simple.len;
nfs4_encode_simple(msg->data + sizeof(*bl_msg), b);
dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
add_wait_queue(&nn->bl_wq, &wq);
rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
if (rc < 0) {
remove_wait_queue(&nn->bl_wq, &wq);
goto out_free_data;
}
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
remove_wait_queue(&nn->bl_wq, &wq);
if (reply->status != BL_DEVICE_REQUEST_PROC) {
printk(KERN_WARNING "%s failed to decode device: %d\n",
__func__, reply->status);
goto out_free_data;
}
dev = MKDEV(reply->major, reply->minor);
out_free_data:
kfree(msg->data);
out_unlock:
mutex_unlock(&nn->bl_mutex);
return dev;
}
static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
size_t mlen)
{
struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
nfs_net_id);
if (mlen != sizeof (struct bl_dev_msg))
return -EINVAL;
if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
return -EFAULT;
wake_up(&nn->bl_wq);
return mlen;
}
static void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct bl_pipe_msg *bl_pipe_msg =
container_of(msg, struct bl_pipe_msg, msg);
if (msg->errno >= 0)
return;
wake_up(bl_pipe_msg->bl_wq);
}
static const struct rpc_pipe_ops bl_upcall_ops = {
.upcall = rpc_pipe_generic_upcall,
.downcall = bl_pipe_downcall,
.destroy_msg = bl_pipe_destroy_msg,
};
static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
struct rpc_pipe *pipe)
{
struct dentry *dir, *dentry;
dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
if (dir == NULL)
return ERR_PTR(-ENOENT);
dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
dput(dir);
return dentry;
}
static void nfs4blocklayout_unregister_sb(struct super_block *sb,
struct rpc_pipe *pipe)
{
if (pipe->dentry)
rpc_unlink(pipe->dentry);
}
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct super_block *sb = ptr;
struct net *net = sb->s_fs_info;
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct dentry *dentry;
int ret = 0;
if (!try_module_get(THIS_MODULE))
return 0;
if (nn->bl_device_pipe == NULL) {
module_put(THIS_MODULE);
return 0;
}
switch (event) {
case RPC_PIPEFS_MOUNT:
dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
break;
}
nn->bl_device_pipe->dentry = dentry;
break;
case RPC_PIPEFS_UMOUNT:
if (nn->bl_device_pipe->dentry)
nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
break;
default:
ret = -ENOTSUPP;
break;
}
module_put(THIS_MODULE);
return ret;
}
static struct notifier_block nfs4blocklayout_block = {
.notifier_call = rpc_pipefs_event,
};
static struct dentry *nfs4blocklayout_register_net(struct net *net,
struct rpc_pipe *pipe)
{
struct super_block *pipefs_sb;
struct dentry *dentry;
pipefs_sb = rpc_get_sb_net(net);
if (!pipefs_sb)
return NULL;
dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
rpc_put_sb_net(net);
return dentry;
}
static void nfs4blocklayout_unregister_net(struct net *net,
struct rpc_pipe *pipe)
{
struct super_block *pipefs_sb;
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
rpc_put_sb_net(net);
}
}
static int nfs4blocklayout_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct dentry *dentry;
mutex_init(&nn->bl_mutex);
init_waitqueue_head(&nn->bl_wq);
nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
if (IS_ERR(nn->bl_device_pipe))
return PTR_ERR(nn->bl_device_pipe);
dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
if (IS_ERR(dentry)) {
rpc_destroy_pipe_data(nn->bl_device_pipe);
return PTR_ERR(dentry);
}
nn->bl_device_pipe->dentry = dentry;
return 0;
}
static void nfs4blocklayout_net_exit(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
nfs4blocklayout_unregister_net(net, nn->bl_device_pipe);
rpc_destroy_pipe_data(nn->bl_device_pipe);
nn->bl_device_pipe = NULL;
}
static struct pernet_operations nfs4blocklayout_net_ops = {
.init = nfs4blocklayout_net_init,
.exit = nfs4blocklayout_net_exit,
};
int __init bl_init_pipefs(void)
{
int ret;
ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block);
if (ret)
goto out;
ret = register_pernet_subsys(&nfs4blocklayout_net_ops);
if (ret)
goto out_unregister_notifier;
return 0;
out_unregister_notifier:
rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
out:
return ret;
}
void __exit bl_cleanup_pipefs(void)
{
rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
unregister_pernet_subsys(&nfs4blocklayout_net_ops);
}

158
fs/nfs/cache_lib.c Normal file
View file

@ -0,0 +1,158 @@
/*
* linux/fs/nfs/cache_lib.c
*
* Helper routines for the NFS client caches
*
* Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <linux/kmod.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <net/net_namespace.h>
#include "cache_lib.h"
#define NFS_CACHE_UPCALL_PATHLEN 256
#define NFS_CACHE_UPCALL_TIMEOUT 15
static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] =
"/sbin/nfs_cache_getent";
static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT;
module_param_string(cache_getent, nfs_cache_getent_prog,
sizeof(nfs_cache_getent_prog), 0600);
MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program");
module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600);
MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which "
"the cache upcall is assumed to have failed");
int nfs_cache_upcall(struct cache_detail *cd, char *entry_name)
{
static char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL
};
char *argv[] = {
nfs_cache_getent_prog,
cd->name,
entry_name,
NULL
};
int ret = -EACCES;
if (nfs_cache_getent_prog[0] == '\0')
goto out;
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
/*
* Disable the upcall mechanism if we're getting an ENOENT or
* EACCES error. The admin can re-enable it on the fly by using
* sysfs to set the 'cache_getent' parameter once the problem
* has been fixed.
*/
if (ret == -ENOENT || ret == -EACCES)
nfs_cache_getent_prog[0] = '\0';
out:
return ret > 0 ? 0 : ret;
}
/*
* Deferred request handling
*/
void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
{
if (atomic_dec_and_test(&dreq->count))
kfree(dreq);
}
static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany)
{
struct nfs_cache_defer_req *dreq;
dreq = container_of(d, struct nfs_cache_defer_req, deferred_req);
complete_all(&dreq->completion);
nfs_cache_defer_req_put(dreq);
}
static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
{
struct nfs_cache_defer_req *dreq;
dreq = container_of(req, struct nfs_cache_defer_req, req);
dreq->deferred_req.revisit = nfs_dns_cache_revisit;
atomic_inc(&dreq->count);
return &dreq->deferred_req;
}
struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
{
struct nfs_cache_defer_req *dreq;
dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
if (dreq) {
init_completion(&dreq->completion);
atomic_set(&dreq->count, 1);
dreq->req.defer = nfs_dns_cache_defer;
}
return dreq;
}
int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
{
if (wait_for_completion_timeout(&dreq->completion,
nfs_cache_getent_timeout * HZ) == 0)
return -ETIMEDOUT;
return 0;
}
int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd)
{
int ret;
struct dentry *dir;
dir = rpc_d_lookup_sb(sb, "cache");
ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd);
dput(dir);
return ret;
}
int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
{
struct super_block *pipefs_sb;
int ret = 0;
sunrpc_init_cache_detail(cd);
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
ret = nfs_cache_register_sb(pipefs_sb, cd);
rpc_put_sb_net(net);
if (ret)
sunrpc_destroy_cache_detail(cd);
}
return ret;
}
void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
{
if (cd->u.pipefs.dir)
sunrpc_cache_unregister_pipefs(cd);
}
void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
{
struct super_block *pipefs_sb;
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
nfs_cache_unregister_sb(pipefs_sb, cd);
rpc_put_sb_net(net);
}
sunrpc_destroy_cache_detail(cd);
}

31
fs/nfs/cache_lib.h Normal file
View file

@ -0,0 +1,31 @@
/*
* Helper routines for the NFS client caches
*
* Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <linux/completion.h>
#include <linux/sunrpc/cache.h>
#include <linux/atomic.h>
/*
* Deferred request handling
*/
struct nfs_cache_defer_req {
struct cache_req req;
struct cache_deferred_req deferred_req;
struct completion completion;
atomic_t count;
};
extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
extern int nfs_cache_register_sb(struct super_block *sb,
struct cache_detail *cd);
extern void nfs_cache_unregister_sb(struct super_block *sb,
struct cache_detail *cd);

499
fs/nfs/callback.c Normal file
View file

@ -0,0 +1,499 @@
/*
* linux/fs/nfs/callback.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback handling
*/
#include <linux/completion.h>
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfs_fs.h>
#include <linux/errno.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/bc_xprt.h>
#include <net/inet_sock.h>
#include "nfs4_fs.h"
#include "callback.h"
#include "internal.h"
#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_CALLBACK
struct nfs_callback_data {
unsigned int users;
struct svc_serv *serv;
struct svc_rqst *rqst;
struct task_struct *task;
};
static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1];
static DEFINE_MUTEX(nfs_callback_mutex);
static struct svc_program nfs4_callback_program;
static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
{
int ret;
struct nfs_net *nn = net_generic(net, nfs_net_id);
ret = svc_create_xprt(serv, "tcp", net, PF_INET,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret <= 0)
goto out_err;
nn->nfs_callback_tcpport = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
nn->nfs_callback_tcpport, PF_INET, net);
ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
nn->nfs_callback_tcpport6, PF_INET6, net);
} else if (ret != -EAFNOSUPPORT)
goto out_err;
return 0;
out_err:
return (ret) ? ret : -ENOMEM;
}
/*
* This is the NFSv4 callback kernel thread.
*/
static int
nfs4_callback_svc(void *vrqstp)
{
int err;
struct svc_rqst *rqstp = vrqstp;
set_freezable();
while (!kthread_should_stop()) {
/*
* Listen for a request on the socket
*/
err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
if (err == -EAGAIN || err == -EINTR)
continue;
svc_process(rqstp);
}
return 0;
}
/*
* Prepare to bring up the NFSv4 callback service
*/
static struct svc_rqst *
nfs4_callback_up(struct svc_serv *serv)
{
return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
{
/*
* Create an svc_sock for the back channel service that shares the
* fore channel connection.
* Returns the input port (0) and sets the svc_serv bc_xprt on success
*/
return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
SVC_SOCK_ANONYMOUS);
}
/*
* The callback service for NFSv4.1 callbacks
*/
static int
nfs41_callback_svc(void *vrqstp)
{
struct svc_rqst *rqstp = vrqstp;
struct svc_serv *serv = rqstp->rq_server;
struct rpc_rqst *req;
int error;
DEFINE_WAIT(wq);
set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
continue;
prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
req = list_first_entry(&serv->sv_cb_list,
struct rpc_rqst, rq_bc_list);
list_del(&req->rq_bc_list);
spin_unlock_bh(&serv->sv_cb_lock);
finish_wait(&serv->sv_cb_waitq, &wq);
dprintk("Invoking bc_svc_process()\n");
error = bc_svc_process(serv, req, rqstp);
dprintk("bc_svc_process() returned w/ error code= %d\n",
error);
} else {
spin_unlock_bh(&serv->sv_cb_lock);
/* schedule_timeout to game the hung task watchdog */
schedule_timeout(60 * HZ);
finish_wait(&serv->sv_cb_waitq, &wq);
}
}
return 0;
}
/*
* Bring up the NFSv4.1 callback service
*/
static struct svc_rqst *
nfs41_callback_up(struct svc_serv *serv)
{
struct svc_rqst *rqstp;
INIT_LIST_HEAD(&serv->sv_cb_list);
spin_lock_init(&serv->sv_cb_lock);
init_waitqueue_head(&serv->sv_cb_waitq);
rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
if (IS_ERR(rqstp)) {
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
}
dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp));
return rqstp;
}
static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
{
*rqstpp = nfs41_callback_up(serv);
*callback_svc = nfs41_callback_svc;
}
static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct svc_serv *serv)
{
if (minorversion)
/*
* Save the svc_serv in the transport so that it can
* be referenced when the session backchannel is initialized
*/
xprt->bc_serv = serv;
}
#else
static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
{
return 0;
}
static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
{
*rqstpp = ERR_PTR(-ENOTSUPP);
*callback_svc = ERR_PTR(-ENOTSUPP);
}
static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct svc_serv *serv)
{
}
#endif /* CONFIG_NFS_V4_1 */
static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
struct svc_serv *serv)
{
struct svc_rqst *rqstp;
int (*callback_svc)(void *vrqstp);
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
int ret;
nfs_callback_bc_serv(minorversion, xprt, serv);
if (cb_info->task)
return 0;
switch (minorversion) {
case 0:
/* v4.0 callback setup */
rqstp = nfs4_callback_up(serv);
callback_svc = nfs4_callback_svc;
break;
default:
nfs_minorversion_callback_svc_setup(serv,
&rqstp, &callback_svc);
}
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
svc_sock_update_bufs(serv);
cb_info->serv = serv;
cb_info->rqst = rqstp;
cb_info->task = kthread_create(callback_svc, cb_info->rqst,
"nfsv4.%u-svc", minorversion);
if (IS_ERR(cb_info->task)) {
ret = PTR_ERR(cb_info->task);
svc_exit_thread(cb_info->rqst);
cb_info->rqst = NULL;
cb_info->task = NULL;
return ret;
}
rqstp->rq_task = cb_info->task;
wake_up_process(cb_info->task);
dprintk("nfs_callback_up: service started\n");
return 0;
}
static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
if (--nn->cb_users[minorversion])
return;
dprintk("NFS: destroy per-net callback data; net=%p\n", net);
svc_shutdown_net(serv, net);
}
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
int ret;
if (nn->cb_users[minorversion]++)
return 0;
dprintk("NFS: create per-net callback data; net=%p\n", net);
ret = svc_bind(serv, net);
if (ret < 0) {
printk(KERN_WARNING "NFS: bind callback service failed\n");
goto err_bind;
}
switch (minorversion) {
case 0:
ret = nfs4_callback_up_net(serv, net);
break;
case 1:
case 2:
ret = nfs41_callback_up_net(serv, net);
break;
default:
printk(KERN_ERR "NFS: unknown callback version: %d\n",
minorversion);
ret = -EINVAL;
break;
}
if (ret < 0) {
printk(KERN_ERR "NFS: callback service start failed\n");
goto err_socks;
}
return 0;
err_socks:
svc_rpcb_cleanup(serv, net);
err_bind:
dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %p\n", ret, net);
return ret;
}
static struct svc_serv *nfs_callback_create_svc(int minorversion)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
struct svc_serv *serv;
/*
* Check whether we're already up and running.
*/
if (cb_info->task) {
/*
* Note: increase service usage, because later in case of error
* svc_destroy() will be called.
*/
svc_get(cb_info->serv);
return cb_info->serv;
}
/*
* Sanity check: if there's no task,
* we should be the first user ...
*/
if (cb_info->users)
printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
cb_info->users);
serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
if (!serv) {
printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
return ERR_PTR(-ENOMEM);
}
/* As there is only one thread we need to over-ride the
* default maximum of 80 connections
*/
serv->sv_maxconn = 1024;
dprintk("nfs_callback_create_svc: service created\n");
return serv;
}
/*
* Bring up the callback thread if it is not already up.
*/
int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
{
struct svc_serv *serv;
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
int ret;
struct net *net = xprt->xprt_net;
mutex_lock(&nfs_callback_mutex);
serv = nfs_callback_create_svc(minorversion);
if (IS_ERR(serv)) {
ret = PTR_ERR(serv);
goto err_create;
}
ret = nfs_callback_up_net(minorversion, serv, net);
if (ret < 0)
goto err_net;
ret = nfs_callback_start_svc(minorversion, xprt, serv);
if (ret < 0)
goto err_start;
cb_info->users++;
/*
* svc_create creates the svc_serv with sv_nrthreads == 1, and then
* svc_prepare_thread increments that. So we need to call svc_destroy
* on both success and failure so that the refcount is 1 when the
* thread exits.
*/
err_net:
svc_destroy(serv);
err_create:
mutex_unlock(&nfs_callback_mutex);
return ret;
err_start:
nfs_callback_down_net(minorversion, serv, net);
dprintk("NFS: Couldn't create server thread; err = %d\n", ret);
goto err_net;
}
/*
* Kill the callback thread if it's no longer being used.
*/
void nfs_callback_down(int minorversion, struct net *net)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
mutex_lock(&nfs_callback_mutex);
nfs_callback_down_net(minorversion, cb_info->serv, net);
cb_info->users--;
if (cb_info->users == 0 && cb_info->task != NULL) {
kthread_stop(cb_info->task);
dprintk("nfs_callback_down: service stopped\n");
svc_exit_thread(cb_info->rqst);
dprintk("nfs_callback_down: service destroyed\n");
cb_info->serv = NULL;
cb_info->rqst = NULL;
cb_info->task = NULL;
}
mutex_unlock(&nfs_callback_mutex);
}
/* Boolean check of RPC_AUTH_GSS principal */
int
check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
{
char *p = rqstp->rq_cred.cr_principal;
if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
return 1;
/* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
if (clp->cl_minorversion != 0)
return 0;
/*
* It might just be a normal user principal, in which case
* userspace won't bother to tell us the name at all.
*/
if (p == NULL)
return 0;
/*
* Did we get the acceptor from userland during the SETCLIENID
* negotiation?
*/
if (clp->cl_acceptor)
return !strcmp(p, clp->cl_acceptor);
/*
* Otherwise try to verify it using the cl_hostname. Note that this
* doesn't work if a non-canonical hostname was used in the devname.
*/
/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
if (memcmp(p, "nfs@", 4) != 0)
return 0;
p += 4;
if (strcmp(p, clp->cl_hostname) != 0)
return 0;
return 1;
}
/*
* pg_authenticate method for nfsv4 callback threads.
*
* The authflavor has been negotiated, so an incorrect flavor is a server
* bug. Drop packets with incorrect authflavor.
*
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
*/
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
return SVC_DROP;
break;
case RPC_AUTH_GSS:
/* No RPC_AUTH_GSS support yet in NFSv4.1 */
if (svc_is_backchannel(rqstp))
return SVC_DROP;
}
return SVC_OK;
}
/*
* Define NFS4 callback program
*/
static struct svc_version *nfs4_callback_version[] = {
[1] = &nfs4_callback_version1,
[4] = &nfs4_callback_version4,
};
static struct svc_stat nfs4_callback_stats;
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
.pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
};

214
fs/nfs/callback.h Normal file
View file

@ -0,0 +1,214 @@
/*
* linux/fs/nfs/callback.h
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback definitions
*/
#ifndef __LINUX_FS_NFS_CALLBACK_H
#define __LINUX_FS_NFS_CALLBACK_H
#include <linux/sunrpc/svc.h>
#define NFS4_CALLBACK 0x40000000
#define NFS4_CALLBACK_XDRSIZE 2048
#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
enum nfs4_callback_procnum {
CB_NULL = 0,
CB_COMPOUND = 1,
};
enum nfs4_callback_opnum {
OP_CB_GETATTR = 3,
OP_CB_RECALL = 4,
/* Callback operations new to NFSv4.1 */
OP_CB_LAYOUTRECALL = 5,
OP_CB_NOTIFY = 6,
OP_CB_PUSH_DELEG = 7,
OP_CB_RECALL_ANY = 8,
OP_CB_RECALLABLE_OBJ_AVAIL = 9,
OP_CB_RECALL_SLOT = 10,
OP_CB_SEQUENCE = 11,
OP_CB_WANTS_CANCELLED = 12,
OP_CB_NOTIFY_LOCK = 13,
OP_CB_NOTIFY_DEVICEID = 14,
/* Callback operations new to NFSv4.2 */
OP_CB_OFFLOAD = 15,
OP_CB_ILLEGAL = 10044,
};
struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
u32 slotid;
u32 minorversion;
struct net *net;
};
struct cb_compound_hdr_arg {
unsigned int taglen;
const char *tag;
unsigned int minorversion;
unsigned int cb_ident; /* v4.0 callback identifier */
unsigned nops;
};
struct cb_compound_hdr_res {
__be32 *status;
unsigned int taglen;
const char *tag;
__be32 *nops;
};
struct cb_getattrargs {
struct sockaddr *addr;
struct nfs_fh fh;
uint32_t bitmap[2];
};
struct cb_getattrres {
__be32 status;
uint32_t bitmap[2];
uint64_t size;
uint64_t change_attr;
struct timespec ctime;
struct timespec mtime;
};
struct cb_recallargs {
struct sockaddr *addr;
struct nfs_fh fh;
nfs4_stateid stateid;
uint32_t truncate;
};
#if defined(CONFIG_NFS_V4_1)
struct referring_call {
uint32_t rc_sequenceid;
uint32_t rc_slotid;
};
struct referring_call_list {
struct nfs4_sessionid rcl_sessionid;
uint32_t rcl_nrefcalls;
struct referring_call *rcl_refcalls;
};
struct cb_sequenceargs {
struct sockaddr *csa_addr;
struct nfs4_sessionid csa_sessionid;
uint32_t csa_sequenceid;
uint32_t csa_slotid;
uint32_t csa_highestslotid;
uint32_t csa_cachethis;
uint32_t csa_nrclists;
struct referring_call_list *csa_rclists;
};
struct cb_sequenceres {
__be32 csr_status;
struct nfs4_sessionid csr_sessionid;
uint32_t csr_sequenceid;
uint32_t csr_slotid;
uint32_t csr_highestslotid;
uint32_t csr_target_highestslotid;
};
extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_sequenceres *res,
struct cb_process_state *cps);
extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
#define RCA4_TYPE_MASK_RDATA_DLG 0
#define RCA4_TYPE_MASK_WDATA_DLG 1
#define RCA4_TYPE_MASK_DIR_DLG 2
#define RCA4_TYPE_MASK_FILE_LAYOUT 3
#define RCA4_TYPE_MASK_BLK_LAYOUT 4
#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
#define RCA4_TYPE_MASK_ALL 0xf31f
struct cb_recallanyargs {
struct sockaddr *craa_addr;
uint32_t craa_objs_to_keep;
uint32_t craa_type_mask;
};
extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
void *dummy,
struct cb_process_state *cps);
struct cb_recallslotargs {
struct sockaddr *crsa_addr;
uint32_t crsa_target_highest_slotid;
};
extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
void *dummy,
struct cb_process_state *cps);
struct cb_layoutrecallargs {
struct sockaddr *cbl_addr;
uint32_t cbl_recall_type;
uint32_t cbl_layout_type;
uint32_t cbl_layoutchanged;
union {
struct {
struct nfs_fh cbl_fh;
struct pnfs_layout_range cbl_range;
nfs4_stateid cbl_stateid;
};
struct nfs_fsid cbl_fsid;
};
};
extern __be32 nfs4_callback_layoutrecall(
struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps);
struct cb_devicenotifyitem {
uint32_t cbd_notify_type;
uint32_t cbd_layout_type;
struct nfs4_deviceid cbd_dev_id;
uint32_t cbd_immediate;
};
struct cb_devicenotifyargs {
int ndevs;
struct cb_devicenotifyitem *devs;
};
extern __be32 nfs4_callback_devicenotify(
struct cb_devicenotifyargs *args,
void *dummy, struct cb_process_state *cps);
#endif /* CONFIG_NFS_V4_1 */
extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
struct cb_getattrres *res,
struct cb_process_state *cps);
extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
struct cb_process_state *cps);
#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion, struct net *net);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
#endif /* CONFIG_NFS_V4 */
/*
* nfs41: Callbacks are expected to not cause substantial latency,
* so we limit their concurrency to 1 by setting up the maximum number
* of slots for the backchannel.
*/
#define NFS41_BC_MIN_CALLBACKS 1
#define NFS41_BC_MAX_CALLBACKS 1
extern unsigned int nfs_callback_set_tcpport;
extern unsigned short nfs_callback_tcpport;
#endif /* __LINUX_FS_NFS_CALLBACK_H */

552
fs/nfs/callback_proc.c Normal file
View file

@ -0,0 +1,552 @@
/*
* linux/fs/nfs/callback_proc.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback procedures
*/
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
#include "internal.h"
#include "pnfs.h"
#include "nfs4session.h"
#include "nfs4trace.h"
#ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK
#endif
__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
struct cb_getattrres *res,
struct cb_process_state *cps)
{
struct nfs_delegation *delegation;
struct nfs_inode *nfsi;
struct inode *inode;
res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
dprintk_rcu("NFS: GETATTR callback request from %s\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL)
goto out;
nfsi = NFS_I(inode);
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = delegation->change_attr;
if (nfsi->npages != 0)
res->change_attr++;
res->ctime = inode->i_ctime;
res->mtime = inode->i_mtime;
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
args->bitmap[1];
res->status = 0;
out_iput:
rcu_read_unlock();
iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
return res->status;
}
__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
struct cb_process_state *cps)
{
struct inode *inode;
__be32 res;
res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
dprintk_rcu("NFS: RECALL callback request from %s\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
res = htonl(NFS4ERR_BADHANDLE);
inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL)
goto out;
/* Set up a helper thread to actually return the delegation */
switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
case 0:
res = 0;
break;
case -ENOENT:
res = htonl(NFS4ERR_BAD_STATEID);
break;
default:
res = htonl(NFS4ERR_RESOURCE);
}
trace_nfs4_recall_delegation(inode, -ntohl(res));
iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
}
#if defined(CONFIG_NFS_V4_1)
/*
* Lookup a layout by filehandle.
*
* Note: gets a refcount on the layout hdr and on its respective inode.
* Caller must put the layout hdr and the inode.
*
* TODO: keep track of all layouts (and delegations) in a hash table
* hashed by filehandle.
*/
static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct nfs_server *server;
struct inode *ino;
struct pnfs_layout_hdr *lo;
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
continue;
if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
break;
spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */
if (NFS_I(ino)->layout != lo) {
spin_unlock(&ino->i_lock);
iput(ino);
break;
}
pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
return lo;
}
}
return NULL;
}
static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct pnfs_layout_hdr *lo;
spin_lock(&clp->cl_lock);
rcu_read_lock();
lo = get_layout_by_fh_locked(clp, fh, stateid);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
return lo;
}
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
struct inode *ino;
struct pnfs_layout_hdr *lo;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list);
lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
if (!lo)
goto out;
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock);
pnfs_layoutcommit_inode(ino, false);
spin_lock(&ino->i_lock);
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
&args->cbl_range)) {
rv = NFS4ERR_DELAY;
goto unlock;
}
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
&args->cbl_range);
}
unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
pnfs_put_layout_hdr(lo);
iput(ino);
out:
return rv;
}
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
int stat;
if (args->cbl_recall_type == RETURN_FSID)
stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
else
stat = pnfs_destroy_layouts_byclid(clp, true);
if (stat != 0)
return NFS4ERR_DELAY;
return NFS4ERR_NOMATCHING_LAYOUT;
}
static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
u32 res;
dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
if (args->cbl_recall_type == RETURN_FILE)
res = initiate_file_draining(clp, args);
else
res = initiate_bulk_draining(clp, args);
dprintk("%s returning %i\n", __func__, res);
return res;
}
__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps)
{
u32 res;
dprintk("%s: -->\n", __func__);
if (cps->clp)
res = do_callback_layoutrecall(cps->clp, args);
else
res = NFS4ERR_OP_NOT_IN_SESSION;
dprintk("%s: exit with status = %d\n", __func__, res);
return cpu_to_be32(res);
}
static void pnfs_recall_all_layouts(struct nfs_client *clp)
{
struct cb_layoutrecallargs args;
/* Pretend we got a CB_LAYOUTRECALL(ALL) */
memset(&args, 0, sizeof(args));
args.cbl_recall_type = RETURN_ALL;
/* FIXME we ignore errors, what should we do? */
do_callback_layoutrecall(clp, &args);
}
__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
void *dummy, struct cb_process_state *cps)
{
int i;
__be32 res = 0;
struct nfs_client *clp = cps->clp;
struct nfs_server *server = NULL;
dprintk("%s: -->\n", __func__);
if (!clp) {
res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
goto out;
}
for (i = 0; i < args->ndevs; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];
if (!server ||
server->pnfs_curr_ld->id != dev->cbd_layout_type) {
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
if (server->pnfs_curr_ld &&
server->pnfs_curr_ld->id == dev->cbd_layout_type) {
rcu_read_unlock();
goto found;
}
rcu_read_unlock();
dprintk("%s: layout type %u not found\n",
__func__, dev->cbd_layout_type);
continue;
}
found:
nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id);
}
out:
kfree(args->devs);
dprintk("%s: exit with status = %u\n",
__func__, be32_to_cpu(res));
return res;
}
/*
* Validate the sequenceID sent by the server.
* Return success if the sequenceID is one more than what we last saw on
* this slot, accounting for wraparound. Increments the slot's sequence.
*
* We don't yet implement a duplicate request cache, instead we set the
* back channel ca_maxresponsesize_cached to zero. This is OK for now
* since we only currently implement idempotent callbacks anyway.
*
* We have a single slot backchannel at this time, so we don't bother
* checking the used_slots bit array on the table. The lower layer guarantees
* a single outstanding callback request at a time.
*/
static __be32
validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
{
struct nfs4_slot *slot;
dprintk("%s enter. slotid %u seqid %u\n",
__func__, args->csa_slotid, args->csa_sequenceid);
if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
return htonl(NFS4ERR_BADSLOT);
slot = tbl->slots + args->csa_slotid;
dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
/* Normal */
if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
slot->seq_nr++;
goto out_ok;
}
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
dprintk("%s seqid %u is a replay\n",
__func__, args->csa_sequenceid);
/* Signal process_op to set this error on next op */
if (args->csa_cachethis == 0)
return htonl(NFS4ERR_RETRY_UNCACHED_REP);
/* The ca_maxresponsesize_cached is 0 with no DRC */
else if (args->csa_cachethis == 1)
return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
}
/* Wraparound */
if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
slot->seq_nr = 1;
goto out_ok;
}
/* Misordered request */
return htonl(NFS4ERR_SEQ_MISORDERED);
out_ok:
tbl->highest_used_slotid = args->csa_slotid;
return htonl(NFS4_OK);
}
/*
* For each referring call triple, check the session's slot table for
* a match. If the slot is in use and the sequence numbers match, the
* client is still waiting for a response to the original request.
*/
static bool referring_call_exists(struct nfs_client *clp,
uint32_t nrclists,
struct referring_call_list *rclists)
{
bool status = 0;
int i, j;
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
struct referring_call_list *rclist;
struct referring_call *ref;
/*
* XXX When client trunking is implemented, this becomes
* a session lookup from within the loop
*/
session = clp->cl_session;
tbl = &session->fc_slot_table;
for (i = 0; i < nrclists; i++) {
rclist = &rclists[i];
if (memcmp(session->sess_id.data,
rclist->rcl_sessionid.data,
NFS4_MAX_SESSIONID_LEN) != 0)
continue;
for (j = 0; j < rclist->rcl_nrefcalls; j++) {
ref = &rclist->rcl_refcalls[j];
dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
"slotid %u\n", __func__,
((u32 *)&rclist->rcl_sessionid.data)[0],
((u32 *)&rclist->rcl_sessionid.data)[1],
((u32 *)&rclist->rcl_sessionid.data)[2],
((u32 *)&rclist->rcl_sessionid.data)[3],
ref->rc_sequenceid, ref->rc_slotid);
spin_lock(&tbl->slot_tbl_lock);
status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
tbl->slots[ref->rc_slotid].seq_nr ==
ref->rc_sequenceid);
spin_unlock(&tbl->slot_tbl_lock);
if (status)
goto out;
}
}
out:
return status;
}
__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_sequenceres *res,
struct cb_process_state *cps)
{
struct nfs4_slot_table *tbl;
struct nfs_client *clp;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
clp = nfs4_find_client_sessionid(cps->net, args->csa_addr,
&args->csa_sessionid, cps->minorversion);
if (clp == NULL)
goto out;
tbl = &clp->cl_session->bc_slot_table;
spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
spin_unlock(&tbl->slot_tbl_lock);
status = htonl(NFS4ERR_DELAY);
/* Return NFS4ERR_BADSESSION if we're draining the session
* in order to reset it.
*/
if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
status = htonl(NFS4ERR_BADSESSION);
goto out;
}
status = validate_seqid(&clp->cl_session->bc_slot_table, args);
spin_unlock(&tbl->slot_tbl_lock);
if (status)
goto out;
cps->slotid = args->csa_slotid;
/*
* Check for pending referring calls. If a match is found, a
* related callback was received before the response to the original
* call.
*/
if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
status = htonl(NFS4ERR_DELAY);
goto out;
}
memcpy(&res->csr_sessionid, &args->csa_sessionid,
sizeof(res->csr_sessionid));
res->csr_sequenceid = args->csa_sequenceid;
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
out:
cps->clp = clp; /* put in nfs4_callback_compound */
for (i = 0; i < args->csa_nrclists; i++)
kfree(args->csa_rclists[i].rcl_refcalls);
kfree(args->csa_rclists);
if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
cps->drc_status = status;
status = 0;
} else
res->csr_status = status;
trace_nfs4_cb_sequence(args, res, status);
dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
ntohl(status), ntohl(res->csr_status));
return status;
}
static bool
validate_bitmap_values(unsigned long mask)
{
return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
}
__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
struct cb_process_state *cps)
{
__be32 status;
fmode_t flags = 0;
status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* set in cb_sequence */
goto out;
dprintk_rcu("NFS: RECALL_ANY callback request from %s\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
status = cpu_to_be32(NFS4ERR_INVAL);
if (!validate_bitmap_values(args->craa_type_mask))
goto out;
status = cpu_to_be32(NFS4_OK);
if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags = FMODE_READ;
if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags |= FMODE_WRITE;
if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
&args->craa_type_mask))
pnfs_recall_all_layouts(cps->clp);
if (flags)
nfs_expire_unused_delegation_types(cps->clp, flags);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
}
/* Reduce the fore channel's max_slots to the target value */
__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
struct cb_process_state *cps)
{
struct nfs4_slot_table *fc_tbl;
__be32 status;
status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* set in cb_sequence */
goto out;
dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %u\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
args->crsa_target_highest_slotid);
fc_tbl = &cps->clp->cl_session->fc_slot_table;
status = htonl(NFS4_OK);
nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid);
nfs41_server_notify_target_slotid_update(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
}
#endif /* CONFIG_NFS_V4_1 */

1033
fs/nfs/callback_xdr.c Normal file

File diff suppressed because it is too large Load diff

1473
fs/nfs/client.c Normal file

File diff suppressed because it is too large Load diff

864
fs/nfs/delegation.c Normal file
View file

@ -0,0 +1,864 @@
/*
* linux/fs/nfs/delegation.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFS file delegation management
*
*/
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
#include "nfs4_fs.h"
#include "delegation.h"
#include "internal.h"
#include "nfs4trace.h"
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
if (delegation->cred) {
put_rpccred(delegation->cred);
delegation->cred = NULL;
}
kfree_rcu(delegation, rcu);
}
/**
* nfs_mark_delegation_referenced - set delegation's REFERENCED flag
* @delegation: delegation to process
*
*/
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
static int
nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL && (delegation->type & flags) == flags &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
}
rcu_read_unlock();
return ret;
}
/**
* nfs_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
int nfs4_have_delegation(struct inode *inode, fmode_t flags)
{
return nfs4_do_check_delegation(inode, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
int nfs4_check_delegation(struct inode *inode, fmode_t flags)
{
return nfs4_do_check_delegation(inode, flags, false);
}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
struct file_lock *fl;
int status = 0;
if (inode->i_flock == NULL)
goto out;
/* Protect inode->i_flock using the i_lock */
spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file) != ctx)
continue;
spin_unlock(&inode->i_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0)
goto out;
spin_lock(&inode->i_lock);
}
spin_unlock(&inode->i_lock);
out:
return status;
}
static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
struct nfs4_state_owner *sp;
struct nfs4_state *state;
unsigned int seq;
int err;
again:
spin_lock(&inode->i_lock);
list_for_each_entry(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
if (!nfs4_valid_open_stateid(state))
continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid);
if (!err)
err = nfs_delegation_claim_locks(ctx, state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
if (err != 0)
return err;
goto again;
}
spin_unlock(&inode->i_lock);
return 0;
}
/**
* nfs_inode_reclaim_delegation - process a delegation reclaim request
* @inode: inode to process
* @cred: credential to use for request
* @res: new delegation state from server
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
struct nfs_openres *res)
{
struct nfs_delegation *delegation;
struct rpc_cred *oldcred = NULL;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL) {
nfs4_stateid_copy(&delegation->stateid, &res->delegation);
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
oldcred = delegation->cred;
delegation->cred = get_rpccred(cred);
clear_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags);
NFS_I(inode)->delegation_state = delegation->type;
spin_unlock(&delegation->lock);
rcu_read_unlock();
put_rpccred(oldcred);
trace_nfs4_reclaim_delegation(inode, res->delegation_type);
} else {
/* We appear to have raced with a delegation return. */
spin_unlock(&delegation->lock);
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, res);
}
} else {
rcu_read_unlock();
}
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
int res = 0;
if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
res = nfs4_proc_delegreturn(inode,
delegation->cred,
&delegation->stateid,
issync);
nfs_free_delegation(delegation);
return res;
}
static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation)
{
struct inode *inode = NULL;
spin_lock(&delegation->lock);
if (delegation->inode != NULL)
inode = igrab(delegation->inode);
spin_unlock(&delegation->lock);
return inode;
}
static struct nfs_delegation *
nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
{
struct nfs_delegation *ret = NULL;
struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
ret = delegation;
spin_unlock(&delegation->lock);
out:
return ret;
}
static struct nfs_delegation *
nfs_start_delegation_return(struct nfs_inode *nfsi)
{
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = nfs_start_delegation_return_locked(nfsi);
rcu_read_unlock();
return delegation;
}
static void
nfs_abort_delegation_return(struct nfs_delegation *delegation,
struct nfs_client *clp)
{
spin_lock(&delegation->lock);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
spin_unlock(&delegation->lock);
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
}
static struct nfs_delegation *
nfs_detach_delegation_locked(struct nfs_inode *nfsi,
struct nfs_delegation *delegation,
struct nfs_client *clp)
{
struct nfs_delegation *deleg_cur =
rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
if (deleg_cur == NULL || delegation != deleg_cur)
return NULL;
spin_lock(&delegation->lock);
set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
nfsi->delegation_state = 0;
rcu_assign_pointer(nfsi->delegation, NULL);
spin_unlock(&delegation->lock);
return delegation;
}
static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
struct nfs_delegation *delegation,
struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
spin_unlock(&clp->cl_lock);
return delegation;
}
static struct nfs_delegation *
nfs_inode_detach_delegation(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_delegation *delegation;
delegation = nfs_start_delegation_return(nfsi);
if (delegation == NULL)
return NULL;
return nfs_detach_delegation(nfsi, delegation, server);
}
/**
* nfs_inode_set_delegation - set up a delegation on an inode
* @inode: inode to which delegation applies
* @cred: cred to use for subsequent delegation processing
* @res: new delegation state from server
*
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation, *old_delegation;
struct nfs_delegation *freeme = NULL;
int status = 0;
delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
if (delegation == NULL)
return -ENOMEM;
nfs4_stateid_copy(&delegation->stateid, &res->delegation);
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
delegation->change_attr = inode->i_version;
delegation->cred = get_rpccred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
spin_lock_init(&delegation->lock);
spin_lock(&clp->cl_lock);
old_delegation = rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
if (old_delegation != NULL) {
if (nfs4_stateid_match(&delegation->stateid,
&old_delegation->stateid) &&
delegation->type == old_delegation->type) {
goto out;
}
/*
* Deal with broken servers that hand out two
* delegations for the same file.
* Allow for upgrades to a WRITE delegation, but
* nothing else.
*/
dfprintk(FILE, "%s: server %s handed out "
"a duplicate delegation!\n",
__func__, clp->cl_hostname);
if (delegation->type == old_delegation->type ||
!(delegation->type & FMODE_WRITE)) {
freeme = delegation;
delegation = NULL;
goto out;
}
freeme = nfs_detach_delegation_locked(nfsi,
old_delegation, clp);
if (freeme == NULL)
goto out;
}
list_add_rcu(&delegation->super_list, &server->delegations);
nfsi->delegation_state = delegation->type;
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
/* Ensure we revalidate the attributes and page cache! */
spin_lock(&inode->i_lock);
nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock);
trace_nfs4_set_delegation(inode, res->delegation_type);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
nfs_free_delegation(delegation);
if (freeme != NULL)
nfs_do_return_delegation(inode, freeme, 0);
return status;
}
/*
* Basic procedure for returning a delegation to the server
*/
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
int err = 0;
if (delegation == NULL)
return 0;
do {
if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
break;
err = nfs_delegation_claim_opens(inode, &delegation->stateid);
if (!issync || err != -EAGAIN)
break;
/*
* Guard against state recovery
*/
err = nfs4_wait_clnt_recover(clp);
} while (err == 0);
if (err) {
nfs_abort_delegation_return(delegation, clp);
goto out;
}
if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
goto out;
err = nfs_do_return_delegation(inode, delegation, issync);
out:
return err;
}
static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
struct inode *inode;
spin_lock(&delegation->lock);
inode = delegation->inode;
if (inode && list_empty(&NFS_I(inode)->open_files))
ret = true;
spin_unlock(&delegation->lock);
}
return ret;
}
/**
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
*
* Note that this function is designed to be called by the state
* manager thread. For this reason, it cannot flush the dirty data,
* since that could deadlock in case of a state recovery error.
*
* Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
struct nfs_server *server;
struct inode *inode;
int err = 0;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (!nfs_delegation_need_return(delegation))
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
continue;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
if (!err)
goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
return err;
}
}
rcu_read_unlock();
return 0;
}
/**
* nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
* @inode: inode to process
*
* Does not protect against delegation reclaims, therefore really only safe
* to be called from nfs4_clear_inode().
*/
void nfs_inode_return_delegation_noreclaim(struct inode *inode)
{
struct nfs_delegation *delegation;
delegation = nfs_inode_detach_delegation(inode);
if (delegation != NULL)
nfs_do_return_delegation(inode, delegation, 0);
}
/**
* nfs_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
* This routine will always flush any dirty data to disk on the
* assumption that if we need to return the delegation, then
* we should stop caching.
*
* Returns zero on success, or a negative errno value.
*/
int nfs4_inode_return_delegation(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int err = 0;
nfs_wb_all(inode);
delegation = nfs_start_delegation_return(nfsi);
if (delegation != NULL)
err = nfs_end_delegation_return(inode, delegation, 1);
return err;
}
static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
bool ret = false;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
nfs_mark_return_delegation(server, delegation);
ret = true;
}
return ret;
}
static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
}
static void nfs_delegation_run_state_manager(struct nfs_client *clp)
{
if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
nfs4_schedule_state_manager(clp);
}
/**
* nfs_expire_all_delegations
* @clp: client to process
*
*/
void nfs_expire_all_delegations(struct nfs_client *clp)
{
nfs_client_mark_return_all_delegations(clp);
nfs_delegation_run_state_manager(clp);
}
/**
* nfs_super_return_all_delegations - return delegations for one superblock
* @sb: sb to process
*
*/
void nfs_server_return_all_delegations(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
bool need_wait;
if (clp == NULL)
return;
rcu_read_lock();
need_wait = nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
if (need_wait) {
nfs4_schedule_state_manager(clp);
nfs4_wait_clnt_recover(clp);
}
}
static void nfs_mark_return_unused_delegation_types(struct nfs_server *server,
fmode_t flags)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
nfs_mark_return_if_closed_delegation(server, delegation);
}
}
static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp,
fmode_t flags)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_mark_return_unused_delegation_types(server, flags);
rcu_read_unlock();
}
static void nfs_revoke_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
}
rcu_read_unlock();
}
void nfs_remove_bad_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
nfs_revoke_delegation(inode);
delegation = nfs_inode_detach_delegation(inode);
if (delegation) {
nfs_inode_find_state_and_recover(inode, &delegation->stateid);
nfs_free_delegation(delegation);
}
}
EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
/**
* nfs_expire_unused_delegation_types
* @clp: client to process
* @flags: delegation types to expire
*
*/
void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags)
{
nfs_client_mark_return_unused_delegation_types(clp, flags);
nfs_delegation_run_state_manager(clp);
}
static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
nfs_mark_return_if_closed_delegation(server, delegation);
}
}
/**
* nfs_expire_unreferenced_delegations - Eliminate unused delegations
* @clp: nfs_client to process
*
*/
void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_mark_return_unreferenced_delegations(server);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
}
/**
* nfs_async_inode_return_delegation - asynchronously return a delegation
* @inode: inode to process
* @stateid: state ID information
*
* Returns zero on success, or a negative errno value.
*/
int nfs_async_inode_return_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
filemap_flush(inode->i_mapping);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation == NULL)
goto out_enoent;
if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
return 0;
out_enoent:
rcu_read_unlock();
return -ENOENT;
}
static struct inode *
nfs_delegation_find_inode_server(struct nfs_server *server,
const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
res = igrab(delegation->inode);
}
spin_unlock(&delegation->lock);
if (res != NULL)
break;
}
return res;
}
/**
* nfs_delegation_find_inode - retrieve the inode associated with a delegation
* @clp: client state handle
* @fhandle: filehandle from a delegation recall
*
* Returns pointer to inode matching "fhandle," or NULL if a matching inode
* cannot be found.
*/
struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
const struct nfs_fh *fhandle)
{
struct nfs_server *server;
struct inode *res = NULL;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
res = nfs_delegation_find_inode_server(server, fhandle);
if (res != NULL)
break;
}
rcu_read_unlock();
return res;
}
static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list)
set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
}
/**
* nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
* @clp: nfs_client to process
*
*/
void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_delegation_mark_reclaim_server(server);
rcu_read_unlock();
}
/**
* nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
* @clp: nfs_client to process
*
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
struct nfs_server *server;
struct inode *inode;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
continue;
delegation = nfs_detach_delegation(NFS_I(inode),
delegation, server);
rcu_read_unlock();
if (delegation != NULL)
nfs_free_delegation(delegation);
iput(inode);
goto restart;
}
}
rcu_read_unlock();
}
/**
* nfs_delegations_present - check for existence of delegations
* @clp: client state handle
*
* Returns one if there are any nfs_delegation structures attached
* to this nfs_client.
*/
int nfs_delegations_present(struct nfs_client *clp)
{
struct nfs_server *server;
int ret = 0;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
if (!list_empty(&server->delegations)) {
ret = 1;
break;
}
rcu_read_unlock();
return ret;
}
/**
* nfs4_copy_delegation_stateid - Copy inode's state ID information
* @dst: stateid data structure to fill in
* @inode: inode to check
* @flags: delegation type requirement
*
* Returns "true" and fills in "dst->data" * if inode had a delegation,
* otherwise "false" is returned.
*/
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
fmode_t flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
bool ret;
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
ret = (delegation != NULL && (delegation->type & flags) == flags);
if (ret) {
nfs4_stateid_copy(dst, &delegation->stateid);
nfs_mark_delegation_referenced(delegation);
}
rcu_read_unlock();
return ret;
}

73
fs/nfs/delegation.h Normal file
View file

@ -0,0 +1,73 @@
/*
* linux/fs/nfs/delegation.h
*
* Copyright (c) Trond Myklebust
*
* Definitions pertaining to NFS delegated files
*/
#ifndef FS_NFS_DELEGATION_H
#define FS_NFS_DELEGATION_H
#if IS_ENABLED(CONFIG_NFS_V4)
/*
* NFSv4 delegation
*/
struct nfs_delegation {
struct list_head super_list;
struct rpc_cred *cred;
struct inode *inode;
nfs4_stateid stateid;
fmode_t type;
loff_t maxsize;
__u64 change_attr;
unsigned long flags;
spinlock_t lock;
struct rcu_head rcu;
};
enum {
NFS_DELEGATION_NEED_RECLAIM = 0,
NFS_DELEGATION_RETURN,
NFS_DELEGATION_RETURN_IF_CLOSED,
NFS_DELEGATION_REFERENCED,
NFS_DELEGATION_RETURNING,
NFS_DELEGATION_REVOKED,
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
int nfs4_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_return_delegation_noreclaim(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_server_return_all_delegations(struct nfs_server *);
void nfs_expire_all_delegations(struct nfs_client *clp);
void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
void nfs_remove_bad_delegation(struct inode *inode);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
int nfs4_check_delegation(struct inode *inode, fmode_t flags);
#endif
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
!(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
}
#endif

2507
fs/nfs/dir.c Normal file

File diff suppressed because it is too large Load diff

1003
fs/nfs/direct.c Normal file

File diff suppressed because it is too large Load diff

470
fs/nfs/dns_resolve.c Normal file
View file

@ -0,0 +1,470 @@
/*
* linux/fs/nfs/dns_resolve.c
*
* Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
*
* Resolves DNS hostnames into valid ip addresses
*/
#ifdef CONFIG_NFS_USE_KERNEL_DNS
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
#include <linux/dns_resolver.h>
#include "dns_resolve.h"
ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
struct sockaddr *sa, size_t salen)
{
ssize_t ret;
char *ip_addr = NULL;
int ip_len;
ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
if (ip_len > 0)
ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
else
ret = -ESRCH;
kfree(ip_addr);
return ret;
}
#else
#include <linux/module.h>
#include <linux/hash.h>
#include <linux/string.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/seq_file.h>
#include <linux/inet.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
#define NFS_DNS_HASHBITS 4
#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
struct nfs_dns_ent {
struct cache_head h;
char *hostname;
size_t namelen;
struct sockaddr_storage addr;
size_t addrlen;
};
static void nfs_dns_ent_update(struct cache_head *cnew,
struct cache_head *ckey)
{
struct nfs_dns_ent *new;
struct nfs_dns_ent *key;
new = container_of(cnew, struct nfs_dns_ent, h);
key = container_of(ckey, struct nfs_dns_ent, h);
memcpy(&new->addr, &key->addr, key->addrlen);
new->addrlen = key->addrlen;
}
static void nfs_dns_ent_init(struct cache_head *cnew,
struct cache_head *ckey)
{
struct nfs_dns_ent *new;
struct nfs_dns_ent *key;
new = container_of(cnew, struct nfs_dns_ent, h);
key = container_of(ckey, struct nfs_dns_ent, h);
kfree(new->hostname);
new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
if (new->hostname) {
new->namelen = key->namelen;
nfs_dns_ent_update(cnew, ckey);
} else {
new->namelen = 0;
new->addrlen = 0;
}
}
static void nfs_dns_ent_put(struct kref *ref)
{
struct nfs_dns_ent *item;
item = container_of(ref, struct nfs_dns_ent, h.ref);
kfree(item->hostname);
kfree(item);
}
static struct cache_head *nfs_dns_ent_alloc(void)
{
struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL);
if (item != NULL) {
item->hostname = NULL;
item->namelen = 0;
item->addrlen = 0;
return &item->h;
}
return NULL;
};
static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key)
{
return hash_str(key->hostname, NFS_DNS_HASHBITS);
}
static void nfs_dns_request(struct cache_detail *cd,
struct cache_head *ch,
char **bpp, int *blen)
{
struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
qword_add(bpp, blen, key->hostname);
(*bpp)[-1] = '\n';
}
static int nfs_dns_upcall(struct cache_detail *cd,
struct cache_head *ch)
{
struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
int ret;
ret = nfs_cache_upcall(cd, key->hostname);
if (ret)
ret = sunrpc_cache_pipe_upcall(cd, ch);
return ret;
}
static int nfs_dns_match(struct cache_head *ca,
struct cache_head *cb)
{
struct nfs_dns_ent *a;
struct nfs_dns_ent *b;
a = container_of(ca, struct nfs_dns_ent, h);
b = container_of(cb, struct nfs_dns_ent, h);
if (a->namelen == 0 || a->namelen != b->namelen)
return 0;
return memcmp(a->hostname, b->hostname, a->namelen) == 0;
}
static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
struct cache_head *h)
{
struct nfs_dns_ent *item;
long ttl;
if (h == NULL) {
seq_puts(m, "# ip address hostname ttl\n");
return 0;
}
item = container_of(h, struct nfs_dns_ent, h);
ttl = item->h.expiry_time - seconds_since_boot();
if (ttl < 0)
ttl = 0;
if (!test_bit(CACHE_NEGATIVE, &h->flags)) {
char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1];
rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf));
seq_printf(m, "%15s ", buf);
} else
seq_puts(m, "<none> ");
seq_printf(m, "%15s %ld\n", item->hostname, ttl);
return 0;
}
static struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
struct nfs_dns_ent *key)
{
struct cache_head *ch;
ch = sunrpc_cache_lookup(cd,
&key->h,
nfs_dns_hash(key));
if (!ch)
return NULL;
return container_of(ch, struct nfs_dns_ent, h);
}
static struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd,
struct nfs_dns_ent *new,
struct nfs_dns_ent *key)
{
struct cache_head *ch;
ch = sunrpc_cache_update(cd,
&new->h, &key->h,
nfs_dns_hash(key));
if (!ch)
return NULL;
return container_of(ch, struct nfs_dns_ent, h);
}
static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
{
char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
struct nfs_dns_ent key, *item;
unsigned int ttl;
ssize_t len;
int ret = -EINVAL;
if (buf[buflen-1] != '\n')
goto out;
buf[buflen-1] = '\0';
len = qword_get(&buf, buf1, sizeof(buf1));
if (len <= 0)
goto out;
key.addrlen = rpc_pton(cd->net, buf1, len,
(struct sockaddr *)&key.addr,
sizeof(key.addr));
len = qword_get(&buf, buf1, sizeof(buf1));
if (len <= 0)
goto out;
key.hostname = buf1;
key.namelen = len;
memset(&key.h, 0, sizeof(key.h));
if (get_uint(&buf, &ttl) < 0)
goto out;
if (ttl == 0)
goto out;
key.h.expiry_time = ttl + seconds_since_boot();
ret = -ENOMEM;
item = nfs_dns_lookup(cd, &key);
if (item == NULL)
goto out;
if (key.addrlen == 0)
set_bit(CACHE_NEGATIVE, &key.h.flags);
item = nfs_dns_update(cd, &key, item);
if (item == NULL)
goto out;
ret = 0;
cache_put(&item->h, cd);
out:
return ret;
}
static int do_cache_lookup(struct cache_detail *cd,
struct nfs_dns_ent *key,
struct nfs_dns_ent **item,
struct nfs_cache_defer_req *dreq)
{
int ret = -ENOMEM;
*item = nfs_dns_lookup(cd, key);
if (*item) {
ret = cache_check(cd, &(*item)->h, &dreq->req);
if (ret)
*item = NULL;
}
return ret;
}
static int do_cache_lookup_nowait(struct cache_detail *cd,
struct nfs_dns_ent *key,
struct nfs_dns_ent **item)
{
int ret = -ENOMEM;
*item = nfs_dns_lookup(cd, key);
if (!*item)
goto out_err;
ret = -ETIMEDOUT;
if (!test_bit(CACHE_VALID, &(*item)->h.flags)
|| (*item)->h.expiry_time < seconds_since_boot()
|| cd->flush_time > (*item)->h.last_refresh)
goto out_put;
ret = -ENOENT;
if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
goto out_put;
return 0;
out_put:
cache_put(&(*item)->h, cd);
out_err:
*item = NULL;
return ret;
}
static int do_cache_lookup_wait(struct cache_detail *cd,
struct nfs_dns_ent *key,
struct nfs_dns_ent **item)
{
struct nfs_cache_defer_req *dreq;
int ret = -ENOMEM;
dreq = nfs_cache_defer_req_alloc();
if (!dreq)
goto out;
ret = do_cache_lookup(cd, key, item, dreq);
if (ret == -EAGAIN) {
ret = nfs_cache_wait_for_upcall(dreq);
if (!ret)
ret = do_cache_lookup_nowait(cd, key, item);
}
nfs_cache_defer_req_put(dreq);
out:
return ret;
}
ssize_t nfs_dns_resolve_name(struct net *net, char *name,
size_t namelen, struct sockaddr *sa, size_t salen)
{
struct nfs_dns_ent key = {
.hostname = name,
.namelen = namelen,
};
struct nfs_dns_ent *item = NULL;
ssize_t ret;
struct nfs_net *nn = net_generic(net, nfs_net_id);
ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
if (ret == 0) {
if (salen >= item->addrlen) {
memcpy(sa, &item->addr, item->addrlen);
ret = item->addrlen;
} else
ret = -EOVERFLOW;
cache_put(&item->h, nn->nfs_dns_resolve);
} else if (ret == -ENOENT)
ret = -ESRCH;
return ret;
}
static struct cache_detail nfs_dns_resolve_template = {
.owner = THIS_MODULE,
.hash_size = NFS_DNS_HASHTBL_SIZE,
.name = "dns_resolve",
.cache_put = nfs_dns_ent_put,
.cache_upcall = nfs_dns_upcall,
.cache_request = nfs_dns_request,
.cache_parse = nfs_dns_parse,
.cache_show = nfs_dns_show,
.match = nfs_dns_match,
.init = nfs_dns_ent_init,
.update = nfs_dns_ent_update,
.alloc = nfs_dns_ent_alloc,
};
int nfs_dns_resolver_cache_init(struct net *net)
{
int err;
struct nfs_net *nn = net_generic(net, nfs_net_id);
nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net);
if (IS_ERR(nn->nfs_dns_resolve))
return PTR_ERR(nn->nfs_dns_resolve);
err = nfs_cache_register_net(net, nn->nfs_dns_resolve);
if (err)
goto err_reg;
return 0;
err_reg:
cache_destroy_net(nn->nfs_dns_resolve, net);
return err;
}
void nfs_dns_resolver_cache_destroy(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
nfs_cache_unregister_net(net, nn->nfs_dns_resolve);
cache_destroy_net(nn->nfs_dns_resolve, net);
}
static int nfs4_dns_net_init(struct net *net)
{
return nfs_dns_resolver_cache_init(net);
}
static void nfs4_dns_net_exit(struct net *net)
{
nfs_dns_resolver_cache_destroy(net);
}
static struct pernet_operations nfs4_dns_resolver_ops = {
.init = nfs4_dns_net_init,
.exit = nfs4_dns_net_exit,
};
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct super_block *sb = ptr;
struct net *net = sb->s_fs_info;
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct cache_detail *cd = nn->nfs_dns_resolve;
int ret = 0;
if (cd == NULL)
return 0;
if (!try_module_get(THIS_MODULE))
return 0;
switch (event) {
case RPC_PIPEFS_MOUNT:
ret = nfs_cache_register_sb(sb, cd);
break;
case RPC_PIPEFS_UMOUNT:
nfs_cache_unregister_sb(sb, cd);
break;
default:
ret = -ENOTSUPP;
break;
}
module_put(THIS_MODULE);
return ret;
}
static struct notifier_block nfs_dns_resolver_block = {
.notifier_call = rpc_pipefs_event,
};
int nfs_dns_resolver_init(void)
{
int err;
err = register_pernet_subsys(&nfs4_dns_resolver_ops);
if (err < 0)
goto out;
err = rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
if (err < 0)
goto out1;
return 0;
out1:
unregister_pernet_subsys(&nfs4_dns_resolver_ops);
out:
return err;
}
void nfs_dns_resolver_destroy(void)
{
rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block);
unregister_pernet_subsys(&nfs4_dns_resolver_ops);
}
#endif

36
fs/nfs/dns_resolve.h Normal file
View file

@ -0,0 +1,36 @@
/*
* Resolve DNS hostnames into valid ip addresses
*/
#ifndef __LINUX_FS_NFS_DNS_RESOLVE_H
#define __LINUX_FS_NFS_DNS_RESOLVE_H
#define NFS_DNS_HOSTNAME_MAXLEN (128)
#ifdef CONFIG_NFS_USE_KERNEL_DNS
static inline int nfs_dns_resolver_init(void)
{
return 0;
}
static inline void nfs_dns_resolver_destroy(void)
{}
static inline int nfs_dns_resolver_cache_init(struct net *net)
{
return 0;
}
static inline void nfs_dns_resolver_cache_destroy(struct net *net)
{}
#else
extern int nfs_dns_resolver_init(void);
extern void nfs_dns_resolver_destroy(void);
extern int nfs_dns_resolver_cache_init(struct net *net);
extern void nfs_dns_resolver_cache_destroy(struct net *net);
#endif
extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
size_t namelen, struct sockaddr *sa, size_t salen);
#endif

940
fs/nfs/file.c Normal file
View file

@ -0,0 +1,940 @@
/*
* linux/fs/nfs/file.c
*
* Copyright (C) 1992 Rick Sladkey
*
* Changes Copyright (C) 1994 by Florian La Roche
* - Do not copy data too often around in the kernel.
* - In nfs_file_read the return value of kmalloc wasn't checked.
* - Put in a better version of read look-ahead buffering. Original idea
* and implementation by Wai S Kok elekokws@ee.nus.sg.
*
* Expire cache on write to a file by Wai S Kok (Oct 1994).
*
* Total rewrite of read side for new NFS buffer cache.. Linus.
*
* nfs regular file handling functions
*/
#include <linux/module.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/swap.h>
#include <asm/uaccess.h>
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
#include "pnfs.h"
#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_FILE
static const struct vm_operations_struct nfs_file_vm_ops;
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
#endif
int nfs_check_flags(int flags)
{
if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(nfs_check_flags);
/*
* Open file
*/
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
int res;
dprintk("NFS: open file(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
res = nfs_open(inode, filp);
return res;
}
int
nfs_file_release(struct inode *inode, struct file *filp)
{
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
}
EXPORT_SYMBOL_GPL(nfs_file_release);
/**
* nfs_revalidate_size - Revalidate the file size
* @inode - pointer to inode struct
* @file - pointer to struct file
*
* Revalidates the file length. This is basically a wrapper around
* nfs_revalidate_inode() that takes into account the fact that we may
* have cached writes (in which case we don't care about the server's
* idea of what the file length is), or O_DIRECT (in which case we
* shouldn't trust the cache).
*/
static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
goto out_noreval;
if (filp->f_flags & O_DIRECT)
goto force_reval;
if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
goto force_reval;
if (nfs_attribute_timeout(inode))
goto force_reval;
out_noreval:
return 0;
force_reval:
return __nfs_revalidate_inode(server, inode);
}
loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
{
dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
filp, offset, whence);
/*
* whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
* the cached file length
*/
if (whence != SEEK_SET && whence != SEEK_CUR) {
struct inode *inode = filp->f_mapping->host;
int retval = nfs_revalidate_file_size(inode, filp);
if (retval < 0)
return (loff_t)retval;
}
return generic_file_llseek(filp, offset, whence);
}
EXPORT_SYMBOL_GPL(nfs_file_llseek);
/*
* Flush all dirty pages, and check for write errors.
*/
int
nfs_file_flush(struct file *file, fl_owner_t id)
{
struct inode *inode = file_inode(file);
dprintk("NFS: flush(%pD2)\n", file);
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
/*
* If we're holding a write delegation, then just start the i/o
* but don't wait for completion (or send a commit).
*/
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return filemap_fdatawrite(file->f_mapping);
/* Flush writes to the server and return any errors */
return vfs_fsync(file, 0);
}
EXPORT_SYMBOL_GPL(nfs_file_flush);
ssize_t
nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, to, iocb->ki_pos);
dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
if (result > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
}
return result;
}
EXPORT_SYMBOL_GPL(nfs_file_read);
ssize_t
nfs_file_splice_read(struct file *filp, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
struct inode *inode = file_inode(filp);
ssize_t res;
dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
filp, (unsigned long) count, (unsigned long long) *ppos);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (!res) {
res = generic_file_splice_read(filp, ppos, pipe, count, flags);
if (res > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
}
return res;
}
EXPORT_SYMBOL_GPL(nfs_file_splice_read);
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
struct inode *inode = file_inode(file);
int status;
dprintk("NFS: mmap(%pD2)\n", file);
/* Note: generic_file_mmap() returns ENOSYS on nommu systems
* so we call that before revalidating the mapping
*/
status = generic_file_mmap(file, vma);
if (!status) {
vma->vm_ops = &nfs_file_vm_ops;
status = nfs_revalidate_mapping(inode, file->f_mapping);
}
return status;
}
EXPORT_SYMBOL_GPL(nfs_file_mmap);
/*
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*
* Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
* disk, but it retrieves and clears ctx->error after synching, despite
* the two being set at the same time in nfs_context_set_write_error().
* This is because the former is used to notify the _next_ call to
* nfs_file_write() that a write error occurred, and hence cause it to
* fall back to doing a synchronous write.
*/
int
nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
int have_error, do_resend, status;
int ret = 0;
dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
if (have_error) {
ret = xchg(&ctx->error, 0);
if (ret)
goto out;
}
if (status < 0) {
ret = status;
goto out;
}
do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
if (do_resend)
ret = -EAGAIN;
out:
return ret;
}
EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
static int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct inode *inode = file_inode(file);
trace_nfs_fsync_enter(inode);
do {
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
mutex_unlock(&inode->i_mutex);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
* the NFS_CONTEXT_RESEND_WRITES flag
*/
start = 0;
end = LLONG_MAX;
} while (ret == -EAGAIN);
trace_nfs_fsync_exit(inode, ret);
return ret;
}
/*
* Decide whether a read/modify/write cycle may be more efficient
* then a modify/write/read cycle when writing to a page in the
* page cache.
*
* The modify/write/read cycle may occur if a page is read before
* being completely filled by the writer. In this situation, the
* page must be completely written to stable storage on the server
* before it can be refilled by reading in the page from the server.
* This can lead to expensive, small, FILE_SYNC mode writes being
* done.
*
* It may be more efficient to read the page first if the file is
* open for reading in addition to writing, the page is not marked
* as Uptodate, it is not dirty or waiting to be committed,
* indicating that it was previously allocated and then modified,
* that there were valid bytes of data in that range of the file,
* and that the new data won't completely replace the old data in
* that range of the file.
*/
static int nfs_want_read_modify_write(struct file *file, struct page *page,
loff_t pos, unsigned len)
{
unsigned int pglen = nfs_page_length(page);
unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
unsigned int end = offset + len;
if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
if (!PageUptodate(page))
return 1;
return 0;
}
if ((file->f_mode & FMODE_READ) && /* open for read? */
!PageUptodate(page) && /* Uptodate? */
!PagePrivate(page) && /* i/o request already? */
pglen && /* valid bytes of file? */
(end < pglen || offset)) /* replace all valid bytes? */
return 1;
return 0;
}
/*
* This does the "real" work of the write. We must allocate and lock the
* page to be sent back to the generic routine, which then copies the
* data from user space.
*
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
int ret;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
int once_thru = 0;
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
start:
/*
* Prevent starvation issues if someone is doing a consistency
* sync-to-disk
*/
ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
ret = nfs_flush_incompatible(file, page);
if (ret) {
unlock_page(page);
page_cache_release(page);
} else if (!once_thru &&
nfs_want_read_modify_write(file, page, pos, len)) {
once_thru = 1;
ret = nfs_readpage(file, page);
page_cache_release(page);
if (!ret)
goto start;
}
return ret;
}
static int nfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
/*
* Zero any uninitialised parts of the page, and then mark the page
* as up to date if it turns out that we're extending the file.
*/
if (!PageUptodate(page)) {
unsigned pglen = nfs_page_length(page);
unsigned end = offset + len;
if (pglen == 0) {
zero_user_segments(page, 0, offset,
end, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else if (end >= pglen) {
zero_user_segment(page, end, PAGE_CACHE_SIZE);
if (offset == 0)
SetPageUptodate(page);
} else
zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
}
status = nfs_updatepage(file, page, offset, copied);
unlock_page(page);
page_cache_release(page);
if (status < 0)
return status;
NFS_I(mapping->host)->write_io += copied;
if (nfs_ctx_key_to_expire(ctx)) {
status = nfs_wb_all(mapping->host);
if (status < 0)
return status;
}
return copied;
}
/*
* Partially or wholly invalidate a page
* - Release the private state associated with a page if undergoing complete
* page invalidation
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
*/
static void nfs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
page, offset, length);
if (offset != 0 || length < PAGE_CACHE_SIZE)
return;
/* Cancel any unstarted writes on this page */
nfs_wb_page_cancel(page_file_mapping(page)->host, page);
nfs_fscache_invalidate_page(page, page->mapping->host);
}
/*
* Attempt to release the private state associated with a page
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
* - Return true (may release page) or false (may not)
*/
static int nfs_release_page(struct page *page, gfp_t gfp)
{
struct address_space *mapping = page->mapping;
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
/* Always try to initiate a 'commit' if relevant, but only
* wait for it if __GFP_WAIT is set. Even then, only wait 1
* second and only if the 'bdi' is not congested.
* Waiting indefinitely can cause deadlocks when the NFS
* server is on this machine, when a new TCP connection is
* needed and in other rare cases. There is no particular
* need to wait extensively here. A short wait has the
* benefit that someone else can worry about the freezer.
*/
if (mapping) {
struct nfs_server *nfss = NFS_SERVER(mapping->host);
nfs_commit_inode(mapping->host, 0);
if ((gfp & __GFP_WAIT) &&
!bdi_write_congested(&nfss->backing_dev_info)) {
wait_on_page_bit_killable_timeout(page, PG_private,
HZ);
if (PagePrivate(page))
set_bdi_congested(&nfss->backing_dev_info,
BLK_RW_ASYNC);
}
}
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
return nfs_fscache_release_page(page, gfp);
}
static void nfs_check_dirty_writeback(struct page *page,
bool *dirty, bool *writeback)
{
struct nfs_inode *nfsi;
struct address_space *mapping = page_file_mapping(page);
if (!mapping || PageSwapCache(page))
return;
/*
* Check if an unstable page is currently being committed and
* if so, have the VM treat it as if the page is under writeback
* so it will not block due to pages that will shortly be freeable.
*/
nfsi = NFS_I(mapping->host);
if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
*writeback = true;
return;
}
/*
* If PagePrivate() is set, then the page is not freeable and as the
* inode is not being committed, it's not going to be cleaned in the
* near future so treat it as dirty
*/
if (PagePrivate(page))
*dirty = true;
}
/*
* Attempt to clear the private state associated with a page when an error
* occurs that requires the cached contents of an inode to be written back or
* destroyed
* - Called if either PG_private or fscache is set on the page
* - Caller holds page lock
* - Return 0 if successful, -error otherwise
*/
static int nfs_launder_page(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_inode *nfsi = NFS_I(inode);
dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
inode->i_ino, (long long)page_offset(page));
nfs_fscache_wait_on_page_write(nfsi, page);
return nfs_wb_page(inode, page);
}
#ifdef CONFIG_NFS_SWAP
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
int ret;
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
*span = sis->pages;
rcu_read_lock();
ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1);
rcu_read_unlock();
return ret;
}
static void nfs_swap_deactivate(struct file *file)
{
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
rcu_read_lock();
xs_swapper(rcu_dereference(clnt->cl_xprt), 0);
rcu_read_unlock();
}
#endif
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
.writepage = nfs_writepage,
.writepages = nfs_writepages,
.write_begin = nfs_write_begin,
.write_end = nfs_write_end,
.invalidatepage = nfs_invalidate_page,
.releasepage = nfs_release_page,
.direct_IO = nfs_direct_IO,
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
#endif
};
/*
* Notification that a PTE pointing to an NFS page is about to be made
* writable, implying that someone is about to modify the page through a
* shared-writable mapping
*/
static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *filp = vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
int ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
(long long)page_offset(page));
/* make sure the cache has finished storing the page */
nfs_fscache_wait_on_page_write(NFS_I(inode), page);
lock_page(page);
mapping = page_file_mapping(page);
if (mapping != inode->i_mapping)
goto out_unlock;
wait_on_page_writeback(page);
pagelen = nfs_page_length(page);
if (pagelen == 0)
goto out_unlock;
ret = VM_FAULT_LOCKED;
if (nfs_flush_incompatible(filp, page) == 0 &&
nfs_updatepage(filp, page, 0, pagelen) == 0)
goto out;
ret = VM_FAULT_SIGBUS;
out_unlock:
unlock_page(page);
out:
return ret;
}
static const struct vm_operations_struct nfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = nfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
static int nfs_need_sync_write(struct file *filp, struct inode *inode)
{
struct nfs_open_context *ctx;
if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
return 1;
ctx = nfs_file_open_context(filp);
if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
nfs_ctx_key_to_expire(ctx))
return 1;
return 0;
}
ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
unsigned long written = 0;
ssize_t result;
size_t count = iov_iter_count(from);
loff_t pos = iocb->ki_pos;
result = nfs_key_timeout_notify(file, inode);
if (result)
return result;
if (file->f_flags & O_DIRECT)
return nfs_file_direct_write(iocb, from, pos);
dprintk("NFS: write(%pD2, %zu@%Ld)\n",
file, count, (long long) pos);
result = -EBUSY;
if (IS_SWAPFILE(inode))
goto out_swapfile;
/*
* O_APPEND implies that we must revalidate the file length.
*/
if (file->f_flags & O_APPEND) {
result = nfs_revalidate_file_size(inode, file);
if (result)
goto out;
}
result = count;
if (!count)
goto out;
result = generic_file_write_iter(iocb, from);
if (result > 0)
written = result;
/* Return error values for O_DSYNC and IS_SYNC() */
if (result >= 0 && nfs_need_sync_write(file, inode)) {
int err = vfs_fsync(file, 0);
if (err < 0)
result = err;
}
if (result > 0)
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
out:
return result;
out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
goto out;
}
EXPORT_SYMBOL_GPL(nfs_file_write);
static int
do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
unsigned int saved_type = fl->fl_type;
/* Try local locking first */
posix_test_lock(filp, fl);
if (fl->fl_type != F_UNLCK) {
/* found a conflict */
goto out;
}
fl->fl_type = saved_type;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
goto out_noconflict;
if (is_local)
goto out_noconflict;
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
out:
return status;
out_noconflict:
fl->fl_type = F_UNLCK;
goto out;
}
static int do_vfs_lock(struct file *file, struct file_lock *fl)
{
int res = 0;
switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
case FL_POSIX:
res = posix_lock_file_wait(file, fl);
break;
case FL_FLOCK:
res = flock_lock_file_wait(file, fl);
break;
default:
BUG();
}
return res;
}
static int
do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
struct nfs_lock_context *l_ctx;
int status;
/*
* Flush all pending writes before doing anything
* with locks..
*/
nfs_sync_mapping(filp->f_mapping);
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) {
status = nfs_iocounter_wait(&l_ctx->io_count);
nfs_put_lock_context(l_ctx);
if (status < 0)
return status;
}
/* NOTE: special case
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
*/
/*
* Use local locking if mounted with "-onolock" or with appropriate
* "-olocal_lock="
*/
if (!is_local)
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = do_vfs_lock(filp, fl);
return status;
}
static int
is_time_granular(struct timespec *ts) {
return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
}
static int
do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status;
/*
* Flush all pending writes before doing anything
* with locks..
*/
status = nfs_sync_mapping(filp->f_mapping);
if (status != 0)
goto out;
/*
* Use local locking if mounted with "-onolock" or with appropriate
* "-olocal_lock="
*/
if (!is_local)
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = do_vfs_lock(filp, fl);
if (status < 0)
goto out;
/*
* Revalidate the cache if the server has time stamps granular
* enough to detect subsecond changes. Otherwise, clear the
* cache to prevent missing any changes.
*
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
if (is_time_granular(&NFS_SERVER(inode)->time_delta))
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
else
nfs_zap_caches(inode);
}
out:
return status;
}
/*
* Lock a (portion of) a file
*/
int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int ret = -ENOLCK;
int is_local = 0;
dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
filp, fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
/* No mandatory locks over NFS */
if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
goto out_err;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
is_local = 1;
if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
ret = NFS_PROTO(inode)->lock_check_bounds(fl);
if (ret < 0)
goto out_err;
}
if (IS_GETLK(cmd))
ret = do_getlk(filp, cmd, fl, is_local);
else if (fl->fl_type == F_UNLCK)
ret = do_unlk(filp, cmd, fl, is_local);
else
ret = do_setlk(filp, cmd, fl, is_local);
out_err:
return ret;
}
EXPORT_SYMBOL_GPL(nfs_lock);
/*
* Lock a (portion of) a file
*/
int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int is_local = 0;
dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
filp, fl->fl_type, fl->fl_flags);
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
/*
* The NFSv4 protocol doesn't support LOCK_MAND, which is not part of
* any standard. In principle we might be able to support LOCK_MAND
* on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the
* NFS code is not set up for it.
*/
if (fl->fl_type & LOCK_MAND)
return -EINVAL;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
}
EXPORT_SYMBOL_GPL(nfs_flock);
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read = new_sync_read,
.write = new_sync_write,
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
};
EXPORT_SYMBOL_GPL(nfs_file_operations);

View file

@ -0,0 +1,5 @@
#
# Makefile for the pNFS Files Layout Driver kernel module
#
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,157 @@
/*
* NFSv4 file layout driver data structures.
*
* Copyright (c) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand <dhildebz@umich.edu>
*
* Permission is granted to use, copy, create derivative works, and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* This software is provided as is, without representation or warranty
* of any kind either express or implied, including without limitation
* the implied warranties of merchantability, fitness for a particular
* purpose, or noninfringement. The Regents of the University of
* Michigan shall not be liable for any damages, including special,
* indirect, incidental, or consequential damages, with respect to any
* claim arising out of or in connection with the use of the software,
* even if it has been or is hereafter advised of the possibility of
* such damages.
*/
#ifndef FS_NFS_NFS4FILELAYOUT_H
#define FS_NFS_NFS4FILELAYOUT_H
#include "../pnfs.h"
/*
* Default data server connection timeout and retrans vaules.
* Set by module paramters dataserver_timeo and dataserver_retrans.
*/
#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */
#define NFS4_DEF_DS_RETRANS 5
/*
* Field testing shows we need to support up to 4096 stripe indices.
* We store each index as a u8 (u32 on the wire) to keep the memory footprint
* reasonable. This in turn means we support a maximum of 256
* RFC 5661 multipath_list4 structures.
*/
#define NFS4_PNFS_MAX_STRIPE_CNT 4096
#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
/* error codes for internal use */
#define NFS4ERR_RESET_TO_MDS 12001
enum stripetype4 {
STRIPE_SPARSE = 1,
STRIPE_DENSE = 2
};
/* Individual ip address */
struct nfs4_pnfs_ds_addr {
struct sockaddr_storage da_addr;
size_t da_addrlen;
struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *da_remotestr; /* human readable addr+port */
};
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
unsigned long ds_state;
#define NFS4DS_CONNECTING 0 /* ds is establishing connection */
};
struct nfs4_file_layout_dsaddr {
struct nfs4_deviceid_node id_node;
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
struct nfs4_pnfs_ds *ds_list[1];
};
struct nfs4_filelayout_segment {
struct pnfs_layout_segment generic_hdr;
u32 stripe_type;
u32 commit_through_mds;
u32 stripe_unit;
u32 first_stripe_index;
u64 pattern_offset;
struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
unsigned int num_fh;
struct nfs_fh **fh_array;
};
struct nfs4_filelayout {
struct pnfs_layout_hdr generic_hdr;
struct pnfs_ds_commit_info commit_info;
};
static inline struct nfs4_filelayout *
FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
{
return container_of(lo, struct nfs4_filelayout, generic_hdr);
}
static inline struct nfs4_filelayout_segment *
FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
{
return container_of(lseg,
struct nfs4_filelayout_segment,
generic_hdr);
}
static inline struct nfs4_deviceid_node *
FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
{
return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
}
static inline void
filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
{
u32 *p = (u32 *)&node->deviceid;
printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
p[0], p[1], p[2], p[3]);
set_bit(NFS_DEVICEID_INVALID, &node->flags);
}
static inline bool
filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
{
return test_bit(NFS_DEVICEID_INVALID, &node->flags);
}
extern bool
filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node);
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
extern void print_ds(struct nfs4_pnfs_ds *ds);
u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
u32 ds_idx);
extern struct nfs4_file_layout_dsaddr *
nfs4_fl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_flags);
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
#endif /* FS_NFS_NFS4FILELAYOUT_H */

View file

@ -0,0 +1,745 @@
/*
* Device operations for the pnfs nfs4 file layout driver.
*
* Copyright (c) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand <dhildebz@umich.edu>
* Garth Goodson <Garth.Goodson@netapp.com>
*
* Permission is granted to use, copy, create derivative works, and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* This software is provided as is, without representation or warranty
* of any kind either express or implied, including without limitation
* the implied warranties of merchantability, fitness for a particular
* purpose, or noninfringement. The Regents of the University of
* Michigan shall not be liable for any damages, including special,
* indirect, incidental, or consequential damages, with respect to any
* claim arising out of or in connection with the use of the software,
* even if it has been or is hereafter advised of the possibility of
* such damages.
*/
#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/sunrpc/addr.h>
#include "../internal.h"
#include "../nfs4session.h"
#include "filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
/*
* Data server cache
*
* Data servers can be mapped to different device ids.
* nfs4_pnfs_ds reference counting
* - set to 1 on allocation
* - incremented when a device id maps a data server already in the cache.
* - decremented when deviceid is removed from the cache.
*/
static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
static LIST_HEAD(nfs4_data_server_cache);
/* Debug routines */
void
print_ds(struct nfs4_pnfs_ds *ds)
{
if (ds == NULL) {
printk("%s NULL device\n", __func__);
return;
}
printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
static bool
same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
{
struct sockaddr_in *a, *b;
struct sockaddr_in6 *a6, *b6;
if (addr1->sa_family != addr2->sa_family)
return false;
switch (addr1->sa_family) {
case AF_INET:
a = (struct sockaddr_in *)addr1;
b = (struct sockaddr_in *)addr2;
if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
a->sin_port == b->sin_port)
return true;
break;
case AF_INET6:
a6 = (struct sockaddr_in6 *)addr1;
b6 = (struct sockaddr_in6 *)addr2;
/* LINKLOCAL addresses must have matching scope_id */
if (ipv6_addr_src_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
return false;
if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
a6->sin6_port == b6->sin6_port)
return true;
break;
default:
dprintk("%s: unhandled address family: %u\n",
__func__, addr1->sa_family);
return false;
}
return false;
}
static bool
_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
const struct list_head *dsaddrs2)
{
struct nfs4_pnfs_ds_addr *da1, *da2;
/* step through both lists, comparing as we go */
for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
da1 != NULL && da2 != NULL;
da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
(struct sockaddr *)&da2->da_addr))
return false;
}
if (da1 == NULL && da2 == NULL)
return true;
return false;
}
/*
* Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
_data_server_lookup_locked(const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
return ds;
return NULL;
}
/*
* Create an rpc connection to the nfs4_pnfs_ds data server
* Currently only supports IPv4 and IPv6 addresses
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
struct nfs_client *clp = ERR_PTR(-EIO);
struct nfs4_pnfs_ds_addr *da;
int status = 0;
dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
list_for_each_entry(da, &ds->ds_addrs, da_node) {
dprintk("%s: DS %s: trying address %s\n",
__func__, ds->ds_remotestr, da->da_remotestr);
clp = nfs4_set_ds_client(mds_srv->nfs_client,
(struct sockaddr *)&da->da_addr,
da->da_addrlen, IPPROTO_TCP,
dataserver_timeo, dataserver_retrans);
if (!IS_ERR(clp))
break;
}
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
}
status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
if (status)
goto out_put;
smp_wmb();
ds->ds_clp = clp;
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
out_put:
nfs_put_client(clp);
goto out;
}
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
struct nfs4_pnfs_ds_addr *da;
dprintk("--> %s\n", __func__);
ifdebug(FACILITY)
print_ds(ds);
if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
while (!list_empty(&ds->ds_addrs)) {
da = list_first_entry(&ds->ds_addrs,
struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
}
kfree(ds->ds_remotestr);
kfree(ds);
}
void
nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
struct nfs4_pnfs_ds *ds;
int i;
nfs4_print_deviceid(&dsaddr->id_node.deviceid);
for (i = 0; i < dsaddr->ds_num; i++) {
ds = dsaddr->ds_list[i];
if (ds != NULL) {
if (atomic_dec_and_lock(&ds->ds_count,
&nfs4_ds_cache_lock)) {
list_del_init(&ds->ds_node);
spin_unlock(&nfs4_ds_cache_lock);
destroy_ds(ds);
}
}
}
kfree(dsaddr->stripe_indices);
kfree(dsaddr);
}
/*
* Create a string with a human readable address and port to avoid
* complicated setup around many dprinks.
*/
static char *
nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds_addr *da;
char *remotestr;
size_t len;
char *p;
len = 3; /* '{', '}' and eol */
list_for_each_entry(da, dsaddrs, da_node) {
len += strlen(da->da_remotestr) + 1; /* string plus comma */
}
remotestr = kzalloc(len, gfp_flags);
if (!remotestr)
return NULL;
p = remotestr;
*(p++) = '{';
len--;
list_for_each_entry(da, dsaddrs, da_node) {
size_t ll = strlen(da->da_remotestr);
if (ll > len)
goto out_err;
memcpy(p, da->da_remotestr, ll);
p += ll;
len -= ll;
if (len < 1)
goto out_err;
(*p++) = ',';
len--;
}
if (len < 2)
goto out_err;
*(p++) = '}';
*p = '\0';
return remotestr;
out_err:
kfree(remotestr);
return NULL;
}
static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
if (list_empty(dsaddrs)) {
dprintk("%s: no addresses defined\n", __func__);
goto out;
}
ds = kzalloc(sizeof(*ds), gfp_flags);
if (!ds)
goto out;
/* this is only used for debugging, so it's ok if its NULL */
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
spin_lock(&nfs4_ds_cache_lock);
tmp_ds = _data_server_lookup_locked(dsaddrs);
if (tmp_ds == NULL) {
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
dprintk("%s data server %s found, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
spin_unlock(&nfs4_ds_cache_lock);
out:
return ds;
}
/*
* Currently only supports ipv4, ipv6 and one multi-path address.
*/
static struct nfs4_pnfs_ds_addr *
decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr;
__be16 port;
int nlen, rlen;
int tmp[2];
__be32 *p;
char *netid, *match_netid;
size_t len, match_netid_len;
char *startsep = "";
char *endsep = "";
/* r_netid */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
goto out_err;
nlen = be32_to_cpup(p++);
p = xdr_inline_decode(streamp, nlen);
if (unlikely(!p))
goto out_err;
netid = kmalloc(nlen+1, gfp_flags);
if (unlikely(!netid))
goto out_err;
netid[nlen] = '\0';
memcpy(netid, p, nlen);
/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
goto out_free_netid;
rlen = be32_to_cpup(p);
p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p))
goto out_free_netid;
/* port is ".ABC.DEF", 8 chars max */
if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);
/* replace port '.' with '-' */
portstr = strrchr(buf, '.');
if (!portstr) {
dprintk("%s: Failed finding expected dot in port\n",
__func__);
goto out_free_buf;
}
*portstr = '-';
/* find '.' between address and port */
portstr = strrchr(buf, '.');
if (!portstr) {
dprintk("%s: Failed finding expected dot between address and "
"port\n", __func__);
goto out_free_buf;
}
*portstr = '\0';
da = kzalloc(sizeof(*da), gfp_flags);
if (unlikely(!da))
goto out_free_buf;
INIT_LIST_HEAD(&da->da_node);
if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
sizeof(da->da_addr))) {
dprintk("%s: error parsing address %s\n", __func__, buf);
goto out_free_da;
}
portstr++;
sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));
switch (da->da_addr.ss_family) {
case AF_INET:
((struct sockaddr_in *)&da->da_addr)->sin_port = port;
da->da_addrlen = sizeof(struct sockaddr_in);
match_netid = "tcp";
match_netid_len = 3;
break;
case AF_INET6:
((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
da->da_addrlen = sizeof(struct sockaddr_in6);
match_netid = "tcp6";
match_netid_len = 4;
startsep = "[";
endsep = "]";
break;
default:
dprintk("%s: unsupported address family: %u\n",
__func__, da->da_addr.ss_family);
goto out_free_da;
}
if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
__func__, netid, match_netid);
goto out_free_da;
}
/* save human readable address */
len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
da->da_remotestr = kzalloc(len, gfp_flags);
/* NULL is ok, only used for dprintk */
if (da->da_remotestr)
snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
buf, endsep, ntohs(port));
dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
kfree(buf);
kfree(netid);
return da;
out_free_da:
kfree(da);
out_free_buf:
dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
kfree(buf);
out_free_netid:
kfree(netid);
out_err:
return NULL;
}
/* Decode opaque device data and return the result */
struct nfs4_file_layout_dsaddr *
nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
gfp_t gfp_flags)
{
int i;
u32 cnt, num;
u8 *indexp;
__be32 *p;
u8 *stripe_indices;
u8 max_stripe_index;
struct nfs4_file_layout_dsaddr *dsaddr = NULL;
struct xdr_stream stream;
struct xdr_buf buf;
struct page *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
if (!scratch)
goto out_err;
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
/* Get the stripe count (number of stripe index) */
p = xdr_inline_decode(&stream, 4);
if (unlikely(!p))
goto out_err_free_scratch;
cnt = be32_to_cpup(p);
dprintk("%s stripe count %d\n", __func__, cnt);
if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
"supported maximum %d\n", __func__,
cnt, NFS4_PNFS_MAX_STRIPE_CNT);
goto out_err_free_scratch;
}
/* read stripe indices */
stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
if (!stripe_indices)
goto out_err_free_scratch;
p = xdr_inline_decode(&stream, cnt << 2);
if (unlikely(!p))
goto out_err_free_stripe_indices;
indexp = &stripe_indices[0];
max_stripe_index = 0;
for (i = 0; i < cnt; i++) {
*indexp = be32_to_cpup(p++);
max_stripe_index = max(max_stripe_index, *indexp);
indexp++;
}
/* Check the multipath list count */
p = xdr_inline_decode(&stream, 4);
if (unlikely(!p))
goto out_err_free_stripe_indices;
num = be32_to_cpup(p);
dprintk("%s ds_num %u\n", __func__, num);
if (num > NFS4_PNFS_MAX_MULTI_CNT) {
printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
"supported maximum %d\n", __func__,
num, NFS4_PNFS_MAX_MULTI_CNT);
goto out_err_free_stripe_indices;
}
/* validate stripe indices are all < num */
if (max_stripe_index >= num) {
printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
__func__, max_stripe_index, num);
goto out_err_free_stripe_indices;
}
dsaddr = kzalloc(sizeof(*dsaddr) +
(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
gfp_flags);
if (!dsaddr)
goto out_err_free_stripe_indices;
dsaddr->stripe_count = cnt;
dsaddr->stripe_indices = stripe_indices;
stripe_indices = NULL;
dsaddr->ds_num = num;
nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);
INIT_LIST_HEAD(&dsaddrs);
for (i = 0; i < dsaddr->ds_num; i++) {
int j;
u32 mp_count;
p = xdr_inline_decode(&stream, 4);
if (unlikely(!p))
goto out_err_free_deviceid;
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
da = decode_ds_addr(server->nfs_client->cl_net,
&stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
if (list_empty(&dsaddrs)) {
dprintk("%s: no suitable DS addresses found\n",
__func__);
goto out_err_free_deviceid;
}
dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
/* If DS was already in cache, free ds addrs */
while (!list_empty(&dsaddrs)) {
da = list_first_entry(&dsaddrs,
struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
}
}
__free_page(scratch);
return dsaddr;
out_err_drain_dsaddrs:
while (!list_empty(&dsaddrs)) {
da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
}
out_err_free_deviceid:
nfs4_fl_free_deviceid(dsaddr);
/* stripe_indicies was part of dsaddr */
goto out_err_free_scratch;
out_err_free_stripe_indices:
kfree(stripe_indices);
out_err_free_scratch:
__free_page(scratch);
out_err:
dprintk("%s ERROR: returning NULL\n", __func__);
return NULL;
}
void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
nfs4_put_deviceid_node(&dsaddr->id_node);
}
/*
* Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
* Then: ((res + fsi) % dsaddr->stripe_count)
*/
u32
nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
u64 tmp;
tmp = offset - flseg->pattern_offset;
do_div(tmp, flseg->stripe_unit);
tmp += flseg->first_stripe_index;
return do_div(tmp, flseg->dsaddr->stripe_count);
}
u32
nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
{
return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
}
struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
u32 i;
if (flseg->stripe_type == STRIPE_SPARSE) {
if (flseg->num_fh == 1)
i = 0;
else if (flseg->num_fh == 0)
/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
return NULL;
else
i = nfs4_fl_calc_ds_index(lseg, j);
} else
i = j;
return flseg->fh_array[i];
}
static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
nfs_wait_bit_killable, TASK_KILLABLE);
}
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
{
smp_mb__before_atomic();
clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
smp_mb__after_atomic();
wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
}
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs4_pnfs_ds *ret = ds;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
filelayout_mark_devid_invalid(devid);
goto out;
}
smp_rmb();
if (ds->ds_clp)
goto out_test_devid;
if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
err = nfs4_ds_connect(s, ds);
if (err)
nfs4_mark_deviceid_unavailable(devid);
nfs4_clear_ds_conn_bit(ds);
} else {
/* Either ds is connected, or ds is NULL */
nfs4_wait_ds_connect(ds);
}
out_test_devid:
if (filelayout_test_devid_unavailable(devid))
ret = NULL;
out:
return ret;
}
module_param(dataserver_retrans, uint, 0644);
MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
"retries a request before it attempts further "
" recovery action.");
module_param(dataserver_timeo, uint, 0644);
MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
"NFSv4.1 client waits for a response from a "
" data server before it retries an NFS request.");

336
fs/nfs/fscache-index.c Normal file
View file

@ -0,0 +1,336 @@
/* NFS FS-Cache index structure definition
*
* Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/in6.h>
#include "internal.h"
#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_FSCACHE
/*
* Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks
* the cookie for the top-level index object for NFS into here. The top-level
* index can than have other cache objects inserted into it.
*/
struct fscache_netfs nfs_fscache_netfs = {
.name = "nfs",
.version = 0,
};
/*
* Register NFS for caching
*/
int nfs_fscache_register(void)
{
return fscache_register_netfs(&nfs_fscache_netfs);
}
/*
* Unregister NFS for caching
*/
void nfs_fscache_unregister(void)
{
fscache_unregister_netfs(&nfs_fscache_netfs);
}
/*
* Layout of the key for an NFS server cache object.
*/
struct nfs_server_key {
uint16_t nfsversion; /* NFS protocol version */
uint16_t family; /* address family */
uint16_t port; /* IP port */
union {
struct in_addr ipv4_addr; /* IPv4 address */
struct in6_addr ipv6_addr; /* IPv6 address */
} addr[0];
};
/*
* Generate a key to describe a server in the main NFS index
* - We return the length of the key, or 0 if we can't generate one
*/
static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
const struct nfs_client *clp = cookie_netfs_data;
const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
struct nfs_server_key *key = buffer;
uint16_t len = sizeof(struct nfs_server_key);
memset(key, 0, len);
key->nfsversion = clp->rpc_ops->version;
key->family = clp->cl_addr.ss_family;
switch (clp->cl_addr.ss_family) {
case AF_INET:
key->port = sin->sin_port;
key->addr[0].ipv4_addr = sin->sin_addr;
len += sizeof(key->addr[0].ipv4_addr);
break;
case AF_INET6:
key->port = sin6->sin6_port;
key->addr[0].ipv6_addr = sin6->sin6_addr;
len += sizeof(key->addr[0].ipv6_addr);
break;
default:
printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
clp->cl_addr.ss_family);
len = 0;
break;
}
return len;
}
/*
* Define the server object for FS-Cache. This is used to describe a server
* object to fscache_acquire_cookie(). It is keyed by the NFS protocol and
* server address parameters.
*/
const struct fscache_cookie_def nfs_fscache_server_index_def = {
.name = "NFS.server",
.type = FSCACHE_COOKIE_TYPE_INDEX,
.get_key = nfs_server_get_key,
};
/*
* Generate a key to describe a superblock key in the main NFS index
*/
static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
const struct nfs_fscache_key *key;
const struct nfs_server *nfss = cookie_netfs_data;
uint16_t len;
key = nfss->fscache_key;
len = sizeof(key->key) + key->key.uniq_len;
if (len > bufmax) {
len = 0;
} else {
memcpy(buffer, &key->key, sizeof(key->key));
memcpy(buffer + sizeof(key->key),
key->key.uniquifier, key->key.uniq_len);
}
return len;
}
/*
* Define the superblock object for FS-Cache. This is used to describe a
* superblock object to fscache_acquire_cookie(). It is keyed by all the NFS
* parameters that might cause a separate superblock.
*/
const struct fscache_cookie_def nfs_fscache_super_index_def = {
.name = "NFS.super",
.type = FSCACHE_COOKIE_TYPE_INDEX,
.get_key = nfs_super_get_key,
};
/*
* Definition of the auxiliary data attached to NFS inode storage objects
* within the cache.
*
* The contents of this struct are recorded in the on-disk local cache in the
* auxiliary data attached to the data storage object backing an inode. This
* permits coherency to be managed when a new inode binds to an already extant
* cache object.
*/
struct nfs_fscache_inode_auxdata {
struct timespec mtime;
struct timespec ctime;
loff_t size;
u64 change_attr;
};
/*
* Generate a key to describe an NFS inode in an NFS server's index
*/
static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
const struct nfs_inode *nfsi = cookie_netfs_data;
uint16_t nsize;
/* use the inode's NFS filehandle as the key */
nsize = nfsi->fh.size;
memcpy(buffer, nfsi->fh.data, nsize);
return nsize;
}
/*
* Get certain file attributes from the netfs data
* - This function can be absent for an index
* - Not permitted to return an error
* - The netfs data from the cookie being used as the source is presented
*/
static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
const struct nfs_inode *nfsi = cookie_netfs_data;
*size = nfsi->vfs_inode.i_size;
}
/*
* Get the auxiliary data from netfs data
* - This function can be absent if the index carries no state data
* - Should store the auxiliary data in the buffer
* - Should return the amount of amount stored
* - Not permitted to return an error
* - The netfs data from the cookie being used as the source is presented
*/
static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
struct nfs_fscache_inode_auxdata auxdata;
const struct nfs_inode *nfsi = cookie_netfs_data;
memset(&auxdata, 0, sizeof(auxdata));
auxdata.size = nfsi->vfs_inode.i_size;
auxdata.mtime = nfsi->vfs_inode.i_mtime;
auxdata.ctime = nfsi->vfs_inode.i_ctime;
if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
auxdata.change_attr = nfsi->vfs_inode.i_version;
if (bufmax > sizeof(auxdata))
bufmax = sizeof(auxdata);
memcpy(buffer, &auxdata, bufmax);
return bufmax;
}
/*
* Consult the netfs about the state of an object
* - This function can be absent if the index carries no state data
* - The netfs data from the cookie being used as the target is
* presented, as is the auxiliary data
*/
static
enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
const void *data,
uint16_t datalen)
{
struct nfs_fscache_inode_auxdata auxdata;
struct nfs_inode *nfsi = cookie_netfs_data;
if (datalen != sizeof(auxdata))
return FSCACHE_CHECKAUX_OBSOLETE;
memset(&auxdata, 0, sizeof(auxdata));
auxdata.size = nfsi->vfs_inode.i_size;
auxdata.mtime = nfsi->vfs_inode.i_mtime;
auxdata.ctime = nfsi->vfs_inode.i_ctime;
if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
auxdata.change_attr = nfsi->vfs_inode.i_version;
if (memcmp(data, &auxdata, datalen) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
return FSCACHE_CHECKAUX_OKAY;
}
/*
* Indication from FS-Cache that the cookie is no longer cached
* - This function is called when the backing store currently caching a cookie
* is removed
* - The netfs should use this to clean up any markers indicating cached pages
* - This is mandatory for any object that may have data
*/
static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data)
{
struct nfs_inode *nfsi = cookie_netfs_data;
struct pagevec pvec;
pgoff_t first;
int loop, nr_pages;
pagevec_init(&pvec, 0);
first = 0;
dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi);
for (;;) {
/* grab a bunch of pages to unmark */
nr_pages = pagevec_lookup(&pvec,
nfsi->vfs_inode.i_mapping,
first,
PAGEVEC_SIZE - pagevec_count(&pvec));
if (!nr_pages)
break;
for (loop = 0; loop < nr_pages; loop++)
ClearPageFsCache(pvec.pages[loop]);
first = pvec.pages[nr_pages - 1]->index + 1;
pvec.nr = nr_pages;
pagevec_release(&pvec);
cond_resched();
}
}
/*
* Get an extra reference on a read context.
* - This function can be absent if the completion function doesn't require a
* context.
* - The read context is passed back to NFS in the event that a data read on the
* cache fails with EIO - in which case the server must be contacted to
* retrieve the data, which requires the read context for security.
*/
static void nfs_fh_get_context(void *cookie_netfs_data, void *context)
{
get_nfs_open_context(context);
}
/*
* Release an extra reference on a read context.
* - This function can be absent if the completion function doesn't require a
* context.
*/
static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
{
if (context)
put_nfs_open_context(context);
}
/*
* Define the inode object for FS-Cache. This is used to describe an inode
* object to fscache_acquire_cookie(). It is keyed by the NFS file handle for
* an inode.
*
* Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime
* held in the cache auxiliary data for the data storage object with those in
* the inode struct in memory.
*/
const struct fscache_cookie_def nfs_fscache_inode_object_def = {
.name = "NFS.fh",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
.get_key = nfs_fscache_inode_get_key,
.get_attr = nfs_fscache_inode_get_attr,
.get_aux = nfs_fscache_inode_get_aux,
.check_aux = nfs_fscache_inode_check_aux,
.now_uncached = nfs_fscache_inode_now_uncached,
.get_context = nfs_fh_get_context,
.put_context = nfs_fh_put_context,
};

439
fs/nfs/fscache.c Normal file
View file

@ -0,0 +1,439 @@
/* NFS filesystem cache interface
*
* Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/in6.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_FSCACHE
static struct rb_root nfs_fscache_keys = RB_ROOT;
static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
/*
* Get the per-client index cookie for an NFS client if the appropriate mount
* flag was set
* - We always try and get an index cookie for the client, but get filehandle
* cookies on a per-superblock basis, depending on the mount flags
*/
void nfs_fscache_get_client_cookie(struct nfs_client *clp)
{
/* create a cache index for looking up filehandles */
clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
&nfs_fscache_server_index_def,
clp, true);
dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
}
/*
* Dispose of a per-client cookie
*/
void nfs_fscache_release_client_cookie(struct nfs_client *clp)
{
dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
fscache_relinquish_cookie(clp->fscache, 0);
clp->fscache = NULL;
}
/*
* Get the cache cookie for an NFS superblock. We have to handle
* uniquification here because the cache doesn't do it for us.
*
* The default uniquifier is just an empty string, but it may be overridden
* either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
* superblock across an automount point of some nature.
*/
void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
{
struct nfs_fscache_key *key, *xkey;
struct nfs_server *nfss = NFS_SB(sb);
struct rb_node **p, *parent;
int diff;
if (!uniq) {
uniq = "";
ulen = 1;
}
key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
if (!key)
return;
key->nfs_client = nfss->nfs_client;
key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
key->key.nfs_server.flags = nfss->flags;
key->key.nfs_server.rsize = nfss->rsize;
key->key.nfs_server.wsize = nfss->wsize;
key->key.nfs_server.acregmin = nfss->acregmin;
key->key.nfs_server.acregmax = nfss->acregmax;
key->key.nfs_server.acdirmin = nfss->acdirmin;
key->key.nfs_server.acdirmax = nfss->acdirmax;
key->key.nfs_server.fsid = nfss->fsid;
key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor;
key->key.uniq_len = ulen;
memcpy(key->key.uniquifier, uniq, ulen);
spin_lock(&nfs_fscache_keys_lock);
p = &nfs_fscache_keys.rb_node;
parent = NULL;
while (*p) {
parent = *p;
xkey = rb_entry(parent, struct nfs_fscache_key, node);
if (key->nfs_client < xkey->nfs_client)
goto go_left;
if (key->nfs_client > xkey->nfs_client)
goto go_right;
diff = memcmp(&key->key, &xkey->key, sizeof(key->key));
if (diff < 0)
goto go_left;
if (diff > 0)
goto go_right;
if (key->key.uniq_len == 0)
goto non_unique;
diff = memcmp(key->key.uniquifier,
xkey->key.uniquifier,
key->key.uniq_len);
if (diff < 0)
goto go_left;
if (diff > 0)
goto go_right;
goto non_unique;
go_left:
p = &(*p)->rb_left;
continue;
go_right:
p = &(*p)->rb_right;
}
rb_link_node(&key->node, parent, p);
rb_insert_color(&key->node, &nfs_fscache_keys);
spin_unlock(&nfs_fscache_keys_lock);
nfss->fscache_key = key;
/* create a cache index for looking up filehandles */
nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
&nfs_fscache_super_index_def,
nfss, true);
dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
return;
non_unique:
spin_unlock(&nfs_fscache_keys_lock);
kfree(key);
nfss->fscache_key = NULL;
nfss->fscache = NULL;
printk(KERN_WARNING "NFS:"
" Cache request denied due to non-unique superblock keys\n");
}
/*
* release a per-superblock cookie
*/
void nfs_fscache_release_super_cookie(struct super_block *sb)
{
struct nfs_server *nfss = NFS_SB(sb);
dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
fscache_relinquish_cookie(nfss->fscache, 0);
nfss->fscache = NULL;
if (nfss->fscache_key) {
spin_lock(&nfs_fscache_keys_lock);
rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys);
spin_unlock(&nfs_fscache_keys_lock);
kfree(nfss->fscache_key);
nfss->fscache_key = NULL;
}
}
/*
* Initialise the per-inode cache cookie pointer for an NFS inode.
*/
void nfs_fscache_init_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
nfsi->fscache = NULL;
if (!S_ISREG(inode->i_mode))
return;
nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
&nfs_fscache_inode_object_def,
nfsi, false);
}
/*
* Release a per-inode cookie.
*/
void nfs_fscache_clear_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
fscache_relinquish_cookie(cookie, false);
nfsi->fscache = NULL;
}
static bool nfs_fscache_can_enable(void *data)
{
struct inode *inode = data;
return !inode_is_open_for_write(inode);
}
/*
* Enable or disable caching for a file that is being opened as appropriate.
* The cookie is allocated when the inode is initialised, but is not enabled at
* that time. Enablement is deferred to file-open time to avoid stat() and
* access() thrashing the cache.
*
* For now, with NFS, only regular files that are open read-only will be able
* to use the cache.
*
* We enable the cache for an inode if we open it read-only and it isn't
* currently open for writing. We disable the cache if the inode is open
* write-only.
*
* The caller uses the file struct to pin i_writecount on the inode before
* calling us when a file is opened for writing, so we can make use of that.
*
* Note that this may be invoked multiple times in parallel by parallel
* nfs_open() functions.
*/
void nfs_fscache_open_file(struct inode *inode, struct file *filp)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
if (!fscache_cookie_valid(cookie))
return;
if (inode_is_open_for_write(inode)) {
dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
fscache_disable_cookie(cookie, true);
fscache_uncache_all_inode_pages(cookie, inode);
} else {
dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
if (fscache_cookie_enabled(cookie))
set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
}
}
EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
/*
* Release the caching state associated with a page, if the page isn't busy
* interacting with the cache.
* - Returns true (can release page) or false (page busy).
*/
int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
if (PageFsCache(page)) {
struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host);
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
cookie, page, NFS_I(page->mapping->host));
if (!fscache_maybe_release_page(cookie, page, gfp))
return 0;
nfs_add_fscache_stats(page->mapping->host,
NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
}
return 1;
}
/*
* Release the caching state associated with a page if undergoing complete page
* invalidation.
*/
void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
{
struct fscache_cookie *cookie = nfs_i_fscache(inode);
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
cookie, page, NFS_I(inode));
fscache_wait_on_page_write(cookie, page);
BUG_ON(!PageLocked(page));
fscache_uncache_page(cookie, page);
nfs_add_fscache_stats(page->mapping->host,
NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
}
/*
* Handle completion of a page being read from the cache.
* - Called in process (keventd) context.
*/
static void nfs_readpage_from_fscache_complete(struct page *page,
void *context,
int error)
{
dfprintk(FSCACHE,
"NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
page, context, error);
/* if the read completes with an error, we just unlock the page and let
* the VM reissue the readpage */
if (!error) {
SetPageUptodate(page);
unlock_page(page);
} else {
error = nfs_readpage_async(context, page->mapping->host, page);
if (error)
unlock_page(page);
}
}
/*
* Retrieve a page from fscache
*/
int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
struct inode *inode, struct page *page)
{
int ret;
dfprintk(FSCACHE,
"NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
nfs_i_fscache(inode), page, page->index, page->flags, inode);
ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
page,
nfs_readpage_from_fscache_complete,
ctx,
GFP_KERNEL);
switch (ret) {
case 0: /* read BIO submitted (page in fscache) */
dfprintk(FSCACHE,
"NFS: readpage_from_fscache: BIO submitted\n");
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1);
return ret;
case -ENOBUFS: /* inode not in cache */
case -ENODATA: /* page not in cache */
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
dfprintk(FSCACHE,
"NFS: readpage_from_fscache %d\n", ret);
return 1;
default:
dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret);
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
}
return ret;
}
/*
* Retrieve a set of pages from fscache
*/
int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages)
{
unsigned npages = *nr_pages;
int ret;
dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
nfs_i_fscache(inode), npages, inode);
ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode),
mapping, pages, nr_pages,
nfs_readpage_from_fscache_complete,
ctx,
mapping_gfp_mask(mapping));
if (*nr_pages < npages)
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK,
npages);
if (*nr_pages > 0)
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL,
*nr_pages);
switch (ret) {
case 0: /* read submitted to the cache for all pages */
BUG_ON(!list_empty(pages));
BUG_ON(*nr_pages != 0);
dfprintk(FSCACHE,
"NFS: nfs_getpages_from_fscache: submitted\n");
return ret;
case -ENOBUFS: /* some pages aren't cached and can't be */
case -ENODATA: /* some pages aren't cached */
dfprintk(FSCACHE,
"NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
return 1;
default:
dfprintk(FSCACHE,
"NFS: nfs_getpages_from_fscache: ret %d\n", ret);
}
return ret;
}
/*
* Store a newly fetched page in fscache
* - PG_fscache must be set on the page
*/
void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
{
int ret;
dfprintk(FSCACHE,
"NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
nfs_i_fscache(inode), page, page->index, page->flags, sync);
ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
dfprintk(FSCACHE,
"NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
page, page->index, page->flags, ret);
if (ret != 0) {
fscache_uncache_page(nfs_i_fscache(inode), page);
nfs_add_fscache_stats(inode,
NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
} else {
nfs_add_fscache_stats(inode,
NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1);
}
}

229
fs/nfs/fscache.h Normal file
View file

@ -0,0 +1,229 @@
/* NFS filesystem cache interface definitions
*
* Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#ifndef _NFS_FSCACHE_H
#define _NFS_FSCACHE_H
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
#include <linux/fscache.h>
#ifdef CONFIG_NFS_FSCACHE
/*
* set of NFS FS-Cache objects that form a superblock key
*/
struct nfs_fscache_key {
struct rb_node node;
struct nfs_client *nfs_client; /* the server */
/* the elements of the unique key - as used by nfs_compare_super() and
* nfs_compare_mount_options() to distinguish superblocks */
struct {
struct {
unsigned long s_flags; /* various flags
* (& NFS_MS_MASK) */
} super;
struct {
struct nfs_fsid fsid;
int flags;
unsigned int rsize; /* read size */
unsigned int wsize; /* write size */
unsigned int acregmin; /* attr cache timeouts */
unsigned int acregmax;
unsigned int acdirmin;
unsigned int acdirmax;
} nfs_server;
struct {
rpc_authflavor_t au_flavor;
} rpc_auth;
/* uniquifier - can be used if nfs_server.flags includes
* NFS_MOUNT_UNSHARED */
u8 uniq_len;
char uniquifier[0];
} key;
};
/*
* fscache-index.c
*/
extern struct fscache_netfs nfs_fscache_netfs;
extern const struct fscache_cookie_def nfs_fscache_server_index_def;
extern const struct fscache_cookie_def nfs_fscache_super_index_def;
extern const struct fscache_cookie_def nfs_fscache_inode_object_def;
extern int nfs_fscache_register(void);
extern void nfs_fscache_unregister(void);
/*
* fscache.c
*/
extern void nfs_fscache_get_client_cookie(struct nfs_client *);
extern void nfs_fscache_release_client_cookie(struct nfs_client *);
extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
extern void nfs_fscache_release_super_cookie(struct super_block *);
extern void nfs_fscache_init_inode(struct inode *);
extern void nfs_fscache_clear_inode(struct inode *);
extern void nfs_fscache_open_file(struct inode *, struct file *);
extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
extern int nfs_fscache_release_page(struct page *, gfp_t);
extern int __nfs_readpage_from_fscache(struct nfs_open_context *,
struct inode *, struct page *);
extern int __nfs_readpages_from_fscache(struct nfs_open_context *,
struct inode *, struct address_space *,
struct list_head *, unsigned *);
extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int);
/*
* wait for a page to complete writing to the cache
*/
static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
struct page *page)
{
if (PageFsCache(page))
fscache_wait_on_page_write(nfsi->fscache, page);
}
/*
* release the caching state associated with a page if undergoing complete page
* invalidation
*/
static inline void nfs_fscache_invalidate_page(struct page *page,
struct inode *inode)
{
if (PageFsCache(page))
__nfs_fscache_invalidate_page(page, inode);
}
/*
* Retrieve a page from an inode data storage object.
*/
static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
struct inode *inode,
struct page *page)
{
if (NFS_I(inode)->fscache)
return __nfs_readpage_from_fscache(ctx, inode, page);
return -ENOBUFS;
}
/*
* Retrieve a set of pages from an inode data storage object.
*/
static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages)
{
if (NFS_I(inode)->fscache)
return __nfs_readpages_from_fscache(ctx, inode, mapping, pages,
nr_pages);
return -ENOBUFS;
}
/*
* Store a page newly fetched from the server in an inode data storage object
* in the cache.
*/
static inline void nfs_readpage_to_fscache(struct inode *inode,
struct page *page,
int sync)
{
if (PageFsCache(page))
__nfs_readpage_to_fscache(inode, page, sync);
}
/*
* Invalidate the contents of fscache for this inode. This will not sleep.
*/
static inline void nfs_fscache_invalidate(struct inode *inode)
{
fscache_invalidate(NFS_I(inode)->fscache);
}
/*
* Wait for an object to finish being invalidated.
*/
static inline void nfs_fscache_wait_on_invalidate(struct inode *inode)
{
fscache_wait_on_invalidate(NFS_I(inode)->fscache);
}
/*
* indicate the client caching state as readable text
*/
static inline const char *nfs_server_fscache_state(struct nfs_server *server)
{
if (server->fscache && (server->options & NFS_OPTION_FSCACHE))
return "yes";
return "no ";
}
#else /* CONFIG_NFS_FSCACHE */
static inline int nfs_fscache_register(void) { return 0; }
static inline void nfs_fscache_unregister(void) {}
static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
static inline void nfs_fscache_init_inode(struct inode *inode) {}
static inline void nfs_fscache_clear_inode(struct inode *inode) {}
static inline void nfs_fscache_open_file(struct inode *inode,
struct file *filp) {}
static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
return 1; /* True: may release page */
}
static inline void nfs_fscache_invalidate_page(struct page *page,
struct inode *inode) {}
static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
struct page *page) {}
static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
struct inode *inode,
struct page *page)
{
return -ENOBUFS;
}
static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages)
{
return -ENOBUFS;
}
static inline void nfs_readpage_to_fscache(struct inode *inode,
struct page *page, int sync) {}
static inline void nfs_fscache_invalidate(struct inode *inode) {}
static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {}
static inline const char *nfs_server_fscache_state(struct nfs_server *server)
{
return "no ";
}
#endif /* CONFIG_NFS_FSCACHE */
#endif /* _NFS_FSCACHE_H */

133
fs/nfs/getroot.c Normal file
View file

@ -0,0 +1,133 @@
/* getroot.c: get the root dentry for an NFS mount
*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/lockd/bind.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/vfs.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <asm/uaccess.h>
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
/*
* Set the superblock root dentry.
* Note that this function frees the inode in case of error.
*/
static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *inode)
{
/* The mntroot acts as the dummy root dentry for this superblock */
if (sb->s_root == NULL) {
sb->s_root = d_make_root(inode);
if (sb->s_root == NULL)
return -ENOMEM;
ihold(inode);
/*
* Ensure that this dentry is invisible to d_find_alias().
* Otherwise, it may be spliced into the tree by
* d_materialise_unique if a parent directory from the same
* filesystem gets mounted at a later time.
* This again causes shrink_dcache_for_umount_subtree() to
* Oops, since the test for IS_ROOT() will fail.
*/
spin_lock(&sb->s_root->d_inode->i_lock);
spin_lock(&sb->s_root->d_lock);
hlist_del_init(&sb->s_root->d_u.d_alias);
spin_unlock(&sb->s_root->d_lock);
spin_unlock(&sb->s_root->d_inode->i_lock);
}
return 0;
}
/*
* get an NFS2/NFS3 root dentry from the root filehandle
*/
struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
const char *devname)
{
struct nfs_server *server = NFS_SB(sb);
struct nfs_fsinfo fsinfo;
struct dentry *ret;
struct inode *inode;
void *name = kstrdup(devname, GFP_KERNEL);
int error;
if (!name)
return ERR_PTR(-ENOMEM);
/* get the actual root for this mount */
fsinfo.fattr = nfs_alloc_fattr();
if (fsinfo.fattr == NULL) {
kfree(name);
return ERR_PTR(-ENOMEM);
}
error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
if (error < 0) {
dprintk("nfs_get_root: getattr error = %d\n", -error);
ret = ERR_PTR(error);
goto out;
}
inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL);
if (IS_ERR(inode)) {
dprintk("nfs_get_root: get root inode failed\n");
ret = ERR_CAST(inode);
goto out;
}
error = nfs_superblock_set_dummy_root(sb, inode);
if (error != 0) {
ret = ERR_PTR(error);
goto out;
}
/* root dentries normally start off anonymous and get spliced in later
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
*/
ret = d_obtain_root(inode);
if (IS_ERR(ret)) {
dprintk("nfs_get_root: get root dentry failed\n");
goto out;
}
security_d_instantiate(ret, inode);
spin_lock(&ret->d_lock);
if (IS_ROOT(ret) && !ret->d_fsdata &&
!(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
ret->d_fsdata = name;
name = NULL;
}
spin_unlock(&ret->d_lock);
out:
kfree(name);
nfs_free_fattr(fsinfo.fattr);
return ret;
}

791
fs/nfs/idmap.c Normal file
View file

@ -0,0 +1,791 @@
/*
* fs/nfs/idmap.c
*
* UID and GID to name mapping for clients.
*
* Copyright (c) 2002 The Regents of the University of Michigan.
* All rights reserved.
*
* Marius Aamodt Eriksen <marius@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/types.h>
#include <linux/parser.h>
#include <linux/fs.h>
#include <linux/nfs_idmap.h>
#include <net/net_namespace.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/key.h>
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
#include <linux/module.h>
#include "internal.h"
#include "netns.h"
#include "nfs4trace.h"
#define NFS_UINT_MAXLEN 11
static const struct cred *id_resolver_cache;
static struct key_type key_type_id_resolver_legacy;
struct idmap_legacy_upcalldata {
struct rpc_pipe_msg pipe_msg;
struct idmap_msg idmap_msg;
struct key_construction *key_cons;
struct idmap *idmap;
};
struct idmap {
struct rpc_pipe_dir_object idmap_pdo;
struct rpc_pipe *idmap_pipe;
struct idmap_legacy_upcalldata *idmap_upcall_data;
struct mutex idmap_mutex;
};
/**
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
* @fattr: fully initialised struct nfs_fattr
* @owner_name: owner name string cache
* @group_name: group name string cache
*/
void nfs_fattr_init_names(struct nfs_fattr *fattr,
struct nfs4_string *owner_name,
struct nfs4_string *group_name)
{
fattr->owner_name = owner_name;
fattr->group_name = group_name;
}
static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
{
fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
kfree(fattr->owner_name->data);
}
static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
{
fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
kfree(fattr->group_name->data);
}
static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
{
struct nfs4_string *owner = fattr->owner_name;
kuid_t uid;
if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
return false;
if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
fattr->uid = uid;
fattr->valid |= NFS_ATTR_FATTR_OWNER;
}
return true;
}
static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
{
struct nfs4_string *group = fattr->group_name;
kgid_t gid;
if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
return false;
if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
fattr->gid = gid;
fattr->valid |= NFS_ATTR_FATTR_GROUP;
}
return true;
}
/**
* nfs_fattr_free_names - free up the NFSv4 owner and group strings
* @fattr: a fully initialised nfs_fattr structure
*/
void nfs_fattr_free_names(struct nfs_fattr *fattr)
{
if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
nfs_fattr_free_owner_name(fattr);
if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
nfs_fattr_free_group_name(fattr);
}
/**
* nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
* @server: pointer to the filesystem nfs_server structure
* @fattr: a fully initialised nfs_fattr structure
*
* This helper maps the cached NFSv4 owner/group strings in fattr into
* their numeric uid/gid equivalents, and then frees the cached strings.
*/
void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
{
if (nfs_fattr_map_owner_name(server, fattr))
nfs_fattr_free_owner_name(fattr);
if (nfs_fattr_map_group_name(server, fattr))
nfs_fattr_free_group_name(fattr);
}
static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
{
unsigned long val;
char buf[16];
if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
return 0;
memcpy(buf, name, namelen);
buf[namelen] = '\0';
if (kstrtoul(buf, 0, &val) != 0)
return 0;
*res = val;
return 1;
}
static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
{
return snprintf(buf, buflen, "%u", id);
}
static struct key_type key_type_id_resolver = {
.name = "id_resolver",
.preparse = user_preparse,
.free_preparse = user_free_preparse,
.instantiate = generic_key_instantiate,
.revoke = user_revoke,
.destroy = user_destroy,
.describe = user_describe,
.read = user_read,
};
static int nfs_idmap_init_keyring(void)
{
struct cred *cred;
struct key *keyring;
int ret = 0;
printk(KERN_NOTICE "NFS: Registering the %s key type\n",
key_type_id_resolver.name);
cred = prepare_kernel_cred(NULL);
if (!cred)
return -ENOMEM;
keyring = keyring_alloc(".id_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
(KEY_POS_ALL & ~KEY_POS_SETATTR) |
KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
goto failed_put_cred;
}
ret = register_key_type(&key_type_id_resolver);
if (ret < 0)
goto failed_put_key;
ret = register_key_type(&key_type_id_resolver_legacy);
if (ret < 0)
goto failed_reg_legacy;
set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
cred->thread_keyring = keyring;
cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
id_resolver_cache = cred;
return 0;
failed_reg_legacy:
unregister_key_type(&key_type_id_resolver);
failed_put_key:
key_put(keyring);
failed_put_cred:
put_cred(cred);
return ret;
}
static void nfs_idmap_quit_keyring(void)
{
key_revoke(id_resolver_cache->thread_keyring);
unregister_key_type(&key_type_id_resolver);
unregister_key_type(&key_type_id_resolver_legacy);
put_cred(id_resolver_cache);
}
/*
* Assemble the description to pass to request_key()
* This function will allocate a new string and update dest to point
* at it. The caller is responsible for freeing dest.
*
* On error 0 is returned. Otherwise, the length of dest is returned.
*/
static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
const char *type, size_t typelen, char **desc)
{
char *cp;
size_t desclen = typelen + namelen + 2;
*desc = kmalloc(desclen, GFP_KERNEL);
if (!*desc)
return -ENOMEM;
cp = *desc;
memcpy(cp, type, typelen);
cp += typelen;
*cp++ = ':';
memcpy(cp, name, namelen);
cp += namelen;
*cp = '\0';
return desclen;
}
static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
const char *type, struct idmap *idmap)
{
char *desc;
struct key *rkey;
ssize_t ret;
ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
if (ret <= 0)
return ERR_PTR(ret);
rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
desc, "", 0, idmap);
mutex_unlock(&idmap->idmap_mutex);
}
if (!IS_ERR(rkey))
set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
kfree(desc);
return rkey;
}
static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
const char *type, void *data,
size_t data_size, struct idmap *idmap)
{
const struct cred *saved_cred;
struct key *rkey;
struct user_key_payload *payload;
ssize_t ret;
saved_cred = override_creds(id_resolver_cache);
rkey = nfs_idmap_request_key(name, namelen, type, idmap);
revert_creds(saved_cred);
if (IS_ERR(rkey)) {
ret = PTR_ERR(rkey);
goto out;
}
rcu_read_lock();
rkey->perm |= KEY_USR_VIEW;
ret = key_validate(rkey);
if (ret < 0)
goto out_up;
payload = rcu_dereference(rkey->payload.rcudata);
if (IS_ERR_OR_NULL(payload)) {
ret = PTR_ERR(payload);
goto out_up;
}
ret = payload->datalen;
if (ret > 0 && ret <= data_size)
memcpy(data, payload->data, ret);
else
ret = -EINVAL;
out_up:
rcu_read_unlock();
key_put(rkey);
out:
return ret;
}
/* ID -> Name */
static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
size_t buflen, struct idmap *idmap)
{
char id_str[NFS_UINT_MAXLEN];
int id_len;
ssize_t ret;
id_len = snprintf(id_str, sizeof(id_str), "%u", id);
ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
if (ret < 0)
return -EINVAL;
return ret;
}
/* Name -> ID */
static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
__u32 *id, struct idmap *idmap)
{
char id_str[NFS_UINT_MAXLEN];
long id_long;
ssize_t data_size;
int ret = 0;
data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
if (data_size <= 0) {
ret = -EINVAL;
} else {
ret = kstrtol(id_str, 10, &id_long);
*id = (__u32)id_long;
}
return ret;
}
/* idmap classic begins here */
enum {
Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
};
static const match_table_t nfs_idmap_tokens = {
{ Opt_find_uid, "uid:%s" },
{ Opt_find_gid, "gid:%s" },
{ Opt_find_user, "user:%s" },
{ Opt_find_group, "group:%s" },
{ Opt_find_err, NULL }
};
static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
static void idmap_release_pipe(struct inode *);
static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static const struct rpc_pipe_ops idmap_upcall_ops = {
.upcall = rpc_pipe_generic_upcall,
.downcall = idmap_pipe_downcall,
.release_pipe = idmap_release_pipe,
.destroy_msg = idmap_pipe_destroy_msg,
};
static struct key_type key_type_id_resolver_legacy = {
.name = "id_legacy",
.preparse = user_preparse,
.free_preparse = user_free_preparse,
.instantiate = generic_key_instantiate,
.revoke = user_revoke,
.destroy = user_destroy,
.describe = user_describe,
.read = user_read,
.request_key = nfs_idmap_legacy_upcall,
};
static void nfs_idmap_pipe_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct idmap *idmap = pdo->pdo_data;
struct rpc_pipe *pipe = idmap->idmap_pipe;
if (pipe->dentry) {
rpc_unlink(pipe->dentry);
pipe->dentry = NULL;
}
}
static int nfs_idmap_pipe_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct idmap *idmap = pdo->pdo_data;
struct rpc_pipe *pipe = idmap->idmap_pipe;
struct dentry *dentry;
dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
pipe->dentry = dentry;
return 0;
}
static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
.create = nfs_idmap_pipe_create,
.destroy = nfs_idmap_pipe_destroy,
};
int
nfs_idmap_new(struct nfs_client *clp)
{
struct idmap *idmap;
struct rpc_pipe *pipe;
int error;
idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
if (idmap == NULL)
return -ENOMEM;
rpc_init_pipe_dir_object(&idmap->idmap_pdo,
&nfs_idmap_pipe_dir_object_ops,
idmap);
pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
if (IS_ERR(pipe)) {
error = PTR_ERR(pipe);
goto err;
}
idmap->idmap_pipe = pipe;
mutex_init(&idmap->idmap_mutex);
error = rpc_add_pipe_dir_object(clp->cl_net,
&clp->cl_rpcclient->cl_pipedir_objects,
&idmap->idmap_pdo);
if (error)
goto err_destroy_pipe;
clp->cl_idmap = idmap;
return 0;
err_destroy_pipe:
rpc_destroy_pipe_data(idmap->idmap_pipe);
err:
kfree(idmap);
return error;
}
void
nfs_idmap_delete(struct nfs_client *clp)
{
struct idmap *idmap = clp->cl_idmap;
if (!idmap)
return;
clp->cl_idmap = NULL;
rpc_remove_pipe_dir_object(clp->cl_net,
&clp->cl_rpcclient->cl_pipedir_objects,
&idmap->idmap_pdo);
rpc_destroy_pipe_data(idmap->idmap_pipe);
kfree(idmap);
}
int nfs_idmap_init(void)
{
int ret;
ret = nfs_idmap_init_keyring();
if (ret != 0)
goto out;
out:
return ret;
}
void nfs_idmap_quit(void)
{
nfs_idmap_quit_keyring();
}
static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
struct idmap_msg *im,
struct rpc_pipe_msg *msg)
{
substring_t substr;
int token, ret;
im->im_type = IDMAP_TYPE_GROUP;
token = match_token(desc, nfs_idmap_tokens, &substr);
switch (token) {
case Opt_find_uid:
im->im_type = IDMAP_TYPE_USER;
case Opt_find_gid:
im->im_conv = IDMAP_CONV_NAMETOID;
ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
break;
case Opt_find_user:
im->im_type = IDMAP_TYPE_USER;
case Opt_find_group:
im->im_conv = IDMAP_CONV_IDTONAME;
ret = match_int(&substr, &im->im_id);
break;
default:
ret = -EINVAL;
goto out;
}
msg->data = im;
msg->len = sizeof(struct idmap_msg);
out:
return ret;
}
static bool
nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
struct idmap_legacy_upcalldata *data)
{
if (idmap->idmap_upcall_data != NULL) {
WARN_ON_ONCE(1);
return false;
}
idmap->idmap_upcall_data = data;
return true;
}
static void
nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
{
struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
kfree(idmap->idmap_upcall_data);
idmap->idmap_upcall_data = NULL;
complete_request_key(cons, ret);
}
static void
nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
{
if (idmap->idmap_upcall_data != NULL)
nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
}
static int nfs_idmap_legacy_upcall(struct key_construction *cons,
const char *op,
void *aux)
{
struct idmap_legacy_upcalldata *data;
struct rpc_pipe_msg *msg;
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
struct key *key = cons->key;
int ret = -ENOMEM;
/* msg and im are freed in idmap_pipe_destroy_msg */
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out1;
msg = &data->pipe_msg;
im = &data->idmap_msg;
data->idmap = idmap;
data->key_cons = cons;
ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
if (ret < 0)
goto out2;
ret = -EAGAIN;
if (!nfs_idmap_prepare_pipe_upcall(idmap, data))
goto out2;
ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
if (ret < 0)
nfs_idmap_abort_pipe_upcall(idmap, ret);
return ret;
out2:
kfree(data);
out1:
complete_request_key(cons, ret);
return ret;
}
static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
{
return key_instantiate_and_link(key, data, datalen,
id_resolver_cache->thread_keyring,
authkey);
}
static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
struct idmap_msg *upcall,
struct key *key, struct key *authkey)
{
char id_str[NFS_UINT_MAXLEN];
size_t len;
int ret = -ENOKEY;
/* ret = -ENOKEY */
if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv)
goto out;
switch (im->im_conv) {
case IDMAP_CONV_NAMETOID:
if (strcmp(upcall->im_name, im->im_name) != 0)
break;
/* Note: here we store the NUL terminator too */
len = sprintf(id_str, "%d", im->im_id) + 1;
ret = nfs_idmap_instantiate(key, authkey, id_str, len);
break;
case IDMAP_CONV_IDTONAME:
if (upcall->im_id != im->im_id)
break;
len = strlen(im->im_name);
ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
break;
default:
ret = -EINVAL;
}
out:
return ret;
}
static ssize_t
idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct rpc_inode *rpci = RPC_I(file_inode(filp));
struct idmap *idmap = (struct idmap *)rpci->private;
struct key_construction *cons;
struct idmap_msg im;
size_t namelen_in;
int ret = -ENOKEY;
/* If instantiation is successful, anyone waiting for key construction
* will have been woken up and someone else may now have used
* idmap_key_cons - so after this point we may no longer touch it.
*/
if (idmap->idmap_upcall_data == NULL)
goto out_noupcall;
cons = idmap->idmap_upcall_data->key_cons;
if (mlen != sizeof(im)) {
ret = -ENOSPC;
goto out;
}
if (copy_from_user(&im, src, mlen) != 0) {
ret = -EFAULT;
goto out;
}
if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
ret = -ENOKEY;
goto out;
}
namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
ret = -EINVAL;
goto out;
}
ret = nfs_idmap_read_and_verify_message(&im,
&idmap->idmap_upcall_data->idmap_msg,
cons->key, cons->authkey);
if (ret >= 0) {
key_set_timeout(cons->key, nfs_idmap_cache_timeout);
ret = mlen;
}
out:
nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
out_noupcall:
return ret;
}
static void
idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct idmap_legacy_upcalldata *data = container_of(msg,
struct idmap_legacy_upcalldata,
pipe_msg);
struct idmap *idmap = data->idmap;
if (msg->errno)
nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
}
static void
idmap_release_pipe(struct inode *inode)
{
struct rpc_inode *rpci = RPC_I(inode);
struct idmap *idmap = (struct idmap *)rpci->private;
nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
}
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
__u32 id = -1;
int ret = 0;
if (!nfs_map_string_to_numeric(name, namelen, &id))
ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
if (ret == 0) {
*uid = make_kuid(&init_user_ns, id);
if (!uid_valid(*uid))
ret = -ERANGE;
}
trace_nfs4_map_name_to_uid(name, namelen, id, ret);
return ret;
}
int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
__u32 id = -1;
int ret = 0;
if (!nfs_map_string_to_numeric(name, namelen, &id))
ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
if (ret == 0) {
*gid = make_kgid(&init_user_ns, id);
if (!gid_valid(*gid))
ret = -ERANGE;
}
trace_nfs4_map_group_to_gid(name, namelen, id, ret);
return ret;
}
int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
__u32 id;
id = from_kuid(&init_user_ns, uid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
if (ret < 0)
ret = nfs_map_numeric_to_string(id, buf, buflen);
trace_nfs4_map_uid_to_name(buf, ret, id, ret);
return ret;
}
int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
__u32 id;
id = from_kgid(&init_user_ns, gid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
if (ret < 0)
ret = nfs_map_numeric_to_string(id, buf, buflen);
trace_nfs4_map_gid_to_group(buf, ret, id, ret);
return ret;
}

1970
fs/nfs/inode.c Normal file

File diff suppressed because it is too large Load diff

651
fs/nfs/internal.h Normal file
View file

@ -0,0 +1,651 @@
/*
* NFS internal definitions
*/
#include "nfs4_fs.h"
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/crc32.h>
#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
struct nfs_string;
/* Maximum number of readahead requests
* FIXME: this should really be a sysctl so that users may tune it to suit
* their needs. People that do NFS over a slow network, might for
* instance want to reduce it to something closer to 1 for improved
* interactive response.
*/
#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
{
if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
}
static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
{
if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) ||
(((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) &&
((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0)))
return 0;
fattr->fileid = fattr->mounted_on_fileid;
return 1;
}
struct nfs_clone_mount {
const struct super_block *sb;
const struct dentry *dentry;
struct nfs_fh *fh;
struct nfs_fattr *fattr;
char *hostname;
char *mnt_path;
struct sockaddr *addr;
size_t addrlen;
rpc_authflavor_t authflavor;
};
/*
* Note: RFC 1813 doesn't limit the number of auth flavors that
* a server can return, so make something up.
*/
#define NFS_MAX_SECFLAVORS (12)
/*
* Value used if the user did not specify a port value.
*/
#define NFS_UNSPEC_PORT (-1)
/*
* Maximum number of pages that readdir can use for creating
* a vmapped array of pages.
*/
#define NFS_MAX_READDIR_PAGES 8
struct nfs_client_initdata {
unsigned long init_flags;
const char *hostname;
const struct sockaddr *addr;
size_t addrlen;
struct nfs_subversion *nfs_mod;
int proto;
u32 minorversion;
struct net *net;
};
/*
* In-kernel mount arguments
*/
struct nfs_parsed_mount_data {
int flags;
unsigned int rsize, wsize;
unsigned int timeo, retrans;
unsigned int acregmin, acregmax,
acdirmin, acdirmax;
unsigned int namlen;
unsigned int options;
unsigned int bsize;
struct nfs_auth_info auth_info;
rpc_authflavor_t selected_flavor;
char *client_address;
unsigned int version;
unsigned int minorversion;
char *fscache_uniq;
bool need_mount;
struct {
struct sockaddr_storage address;
size_t addrlen;
char *hostname;
u32 version;
int port;
unsigned short protocol;
} mount_server;
struct {
struct sockaddr_storage address;
size_t addrlen;
char *hostname;
char *export_path;
int port;
unsigned short protocol;
} nfs_server;
struct security_mnt_opts lsm_opts;
struct net *net;
};
/* mount_clnt.c */
struct nfs_mount_request {
struct sockaddr *sap;
size_t salen;
char *hostname;
char *dirpath;
u32 version;
unsigned short protocol;
struct nfs_fh *fh;
int noresvport;
unsigned int *auth_flav_len;
rpc_authflavor_t *auth_flavs;
struct net *net;
};
struct nfs_mount_info {
void (*fill_super)(struct super_block *, struct nfs_mount_info *);
int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
struct nfs_parsed_mount_data *parsed;
struct nfs_clone_mount *cloned;
struct nfs_fh *mntfh;
};
extern int nfs_mount(struct nfs_mount_request *info);
extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern const struct rpc_program nfs_program;
extern void nfs_clients_init(struct net *net);
extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *);
int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t);
struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
const struct rpc_timeout *, const char *,
rpc_authflavor_t);
int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
void nfs_server_insert_lists(struct nfs_server *);
void nfs_server_remove_lists(struct nfs_server *);
void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
rpc_authflavor_t);
struct nfs_server *nfs_alloc_server(void);
void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *);
extern void nfs_cleanup_cb_ident_idr(struct net *);
extern void nfs_put_client(struct nfs_client *);
extern void nfs_free_client(struct nfs_client *);
extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *, u32);
extern struct nfs_server *nfs_create_server(struct nfs_mount_info *,
struct nfs_subversion *);
extern struct nfs_server *nfs4_create_server(
struct nfs_mount_info *,
struct nfs_subversion *);
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
struct sockaddr *sap, size_t salen,
struct net *net);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
struct nfs_fattr *,
rpc_authflavor_t);
extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
const struct sockaddr *ds_addr,
int ds_addrlen, int ds_proto,
unsigned int ds_timeo,
unsigned int ds_retrans);
extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
struct inode *);
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
extern int nfs_fs_proc_net_init(struct net *net);
extern void nfs_fs_proc_net_exit(struct net *net);
#else
static inline int nfs_fs_proc_net_init(struct net *net)
{
return 0;
}
static inline void nfs_fs_proc_net_exit(struct net *net)
{
}
static inline int nfs_fs_proc_init(void)
{
return 0;
}
static inline void nfs_fs_proc_exit(void)
{
}
#endif
#ifdef CONFIG_NFS_V4_1
int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *);
#endif
/* callback_xdr.c */
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;
struct nfs_pageio_descriptor;
/* pagelist.c */
extern int __init nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
extern int __init nfs_init_readpagecache(void);
extern void nfs_destroy_readpagecache(void);
extern int __init nfs_init_writepagecache(void);
extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
void nfs_pgio_header_free(struct nfs_pgio_header *);
void nfs_pgio_data_destroy(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
const struct rpc_call_ops *, int, int);
void nfs_free_request(struct nfs_page *req);
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
c->flags = 0;
atomic_set(&c->io_count, 0);
}
/* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
/* nfs3xdr.c */
extern struct rpc_procinfo nfs3_procedures[];
extern int nfs3_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
/* nfs4xdr.c */
#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs4_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
#endif
#ifdef CONFIG_NFS_V4_1
extern const u32 nfs41_maxread_overhead;
extern const u32 nfs41_maxwrite_overhead;
extern const u32 nfs41_maxgetdevinfo_overhead;
#endif
/* nfs4proc.c */
#if IS_ENABLED(CONFIG_NFS_V4)
extern struct rpc_procinfo nfs4_procedures[];
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
static inline void nfs4_label_free(struct nfs4_label *label)
{
if (label) {
kfree(label->label);
kfree(label);
}
return;
}
static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
{
if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL))
nfsi->cache_validity |= NFS_INO_INVALID_LABEL;
}
#else
static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
static inline void nfs4_label_free(void *label) {}
static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
{
}
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr);
/* dir.c */
extern void nfs_force_use_readdirplus(struct inode *dir);
extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
int nfs_create(struct inode *, struct dentry *, umode_t, bool);
int nfs_mkdir(struct inode *, struct dentry *, umode_t);
int nfs_rmdir(struct inode *, struct dentry *);
int nfs_unlink(struct inode *, struct dentry *);
int nfs_symlink(struct inode *, struct dentry *, const char *);
int nfs_link(struct dentry *, struct inode *, struct dentry *);
int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
/* file.c */
int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
loff_t nfs_file_llseek(struct file *, loff_t, int);
int nfs_file_flush(struct file *, fl_owner_t);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
int nfs_lock(struct file *, int, struct file_lock *);
int nfs_flock(struct file *, int, struct file_lock *);
int nfs_check_flags(int);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_destroy_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
/* super.c */
extern const struct super_operations nfs_sops;
extern struct file_system_type nfs_fs_type;
extern struct file_system_type nfs_xdev_fs_type;
#if IS_ENABLED(CONFIG_NFS_V4)
extern struct file_system_type nfs4_xdev_fs_type;
extern struct file_system_type nfs4_referral_fs_type;
#endif
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
struct nfs_subversion *);
void nfs_initialise_sb(struct super_block *);
int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
struct nfs_mount_info *, struct nfs_subversion *);
struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *);
struct dentry * nfs_xdev_mount_common(struct file_system_type *, int,
const char *, struct nfs_mount_info *);
void nfs_kill_super(struct super_block *);
void nfs_fill_super(struct super_block *, struct nfs_mount_info *);
extern struct rpc_stat nfs_rpcstat;
extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void);
extern bool nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct super_block *sb);
/* namespace.c */
#define NFS_PATH_CANONICAL 1
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen, unsigned flags);
extern struct vfsmount *nfs_d_automount(struct path *path);
struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
const char *);
#if IS_ENABLED(CONFIG_NFS_V4)
extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
const char *);
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool);
#endif
struct nfs_pgio_completion_ops;
/* read.c */
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
void nfs_umount_begin(struct super_block *);
int nfs_statfs(struct dentry *, struct kstatfs *);
int nfs_show_options(struct seq_file *, struct dentry *);
int nfs_show_devname(struct seq_file *, struct dentry *);
int nfs_show_path(struct seq_file *, struct dentry *);
int nfs_show_stats(struct seq_file *, struct dentry *);
void nfs_put_super(struct super_block *);
int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_commit_free(struct nfs_commit_data *p);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops,
int how, int flags);
extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max);
unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
int nfs_scan_commit(struct inode *inode, struct list_head *dst,
struct nfs_commit_info *cinfo);
void nfs_mark_request_commit(struct nfs_page *req,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
int nfs_write_need_commit(struct nfs_pgio_header *);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
void nfs_commitdata_release(struct nfs_commit_data *data);
void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
struct nfs_commit_info *cinfo);
void nfs_request_remove_commit_list(struct nfs_page *req,
struct nfs_commit_info *cinfo);
void nfs_init_cinfo(struct nfs_commit_info *cinfo,
struct inode *inode,
struct nfs_direct_req *dreq);
int nfs_key_timeout_notify(struct file *filp, struct inode *inode);
bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
#else
#define nfs_migrate_page NULL
#endif
/* unlink.c */
extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
struct dentry *old_dentry, struct dentry *new_dentry,
void (*complete)(struct rpc_task *, struct nfs_renamedata *));
extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq);
static inline void nfs_inode_dio_wait(struct inode *inode)
{
inode_dio_wait(inode);
}
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr);
extern int nfs40_walk_client_list(struct nfs_client *clp,
struct nfs_client **result,
struct rpc_cred *cred);
extern int nfs41_walk_client_list(struct nfs_client *clp,
struct nfs_client **result,
struct rpc_cred *cred);
static inline struct inode *nfs_igrab_and_active(struct inode *inode)
{
inode = igrab(inode);
if (inode != NULL && !nfs_sb_active(inode->i_sb)) {
iput(inode);
inode = NULL;
}
return inode;
}
static inline void nfs_iput_and_deactive(struct inode *inode)
{
if (inode != NULL) {
struct super_block *sb = inode->i_sb;
iput(inode);
nfs_sb_deactive(sb);
}
}
/*
* Determine the device name as a string
*/
static inline char *nfs_devname(struct dentry *dentry,
char *buffer, ssize_t buflen)
{
char *dummy;
return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
}
/*
* Determine the actual block size (and log2 thereof)
*/
static inline
unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
{
/* make sure blocksize is a power of two */
if ((bsize & (bsize - 1)) || nrbitsp) {
unsigned char nrbits;
for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
;
bsize = 1 << nrbits;
if (nrbitsp)
*nrbitsp = nrbits;
}
return bsize;
}
/*
* Calculate the number of 512byte blocks used.
*/
static inline blkcnt_t nfs_calc_block_size(u64 tsize)
{
blkcnt_t used = (tsize + 511) >> 9;
return (used > ULONG_MAX) ? ULONG_MAX : used;
}
/*
* Compute and set NFS server blocksize
*/
static inline
unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
{
if (bsize < NFS_MIN_FILE_IO_SIZE)
bsize = NFS_DEF_FILE_IO_SIZE;
else if (bsize >= NFS_MAX_FILE_IO_SIZE)
bsize = NFS_MAX_FILE_IO_SIZE;
return nfs_block_bits(bsize, nrbitsp);
}
/*
* Determine the maximum file size for a superblock
*/
static inline
void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
{
sb->s_maxbytes = (loff_t)maxfilesize;
if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
sb->s_maxbytes = MAX_LFS_FILESIZE;
}
/*
* Determine the number of bytes of data the page contains
*/
static inline
unsigned int nfs_page_length(struct page *page)
{
loff_t i_size = i_size_read(page_file_mapping(page)->host);
if (i_size > 0) {
pgoff_t page_index = page_file_index(page);
pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
if (page_index < end_index)
return PAGE_CACHE_SIZE;
if (page_index == end_index)
return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
}
return 0;
}
/*
* Convert a umode to a dirent->d_type
*/
static inline
unsigned char nfs_umode_to_dtype(umode_t mode)
{
return (mode >> 12) & 15;
}
/*
* Determine the number of pages in an array of length 'len' and
* with a base offset of 'base'
*/
static inline
unsigned int nfs_page_array_len(unsigned int base, size_t len)
{
return ((unsigned long)len + (unsigned long)base +
PAGE_SIZE - 1) >> PAGE_SHIFT;
}
/*
* Convert a struct timespec into a 64-bit change attribute
*
* This does approximately the same thing as timespec_to_ns(),
* but for calculation efficiency, we multiply the seconds by
* 1024*1024*1024.
*/
static inline
u64 nfs_timespec_to_change_attr(const struct timespec *ts)
{
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
#ifdef CONFIG_CRC32
/**
* nfs_fhandle_hash - calculate the crc32 hash for the filehandle
* @fh - pointer to filehandle
*
* returns a crc32 hash for the filehandle that is compatible with
* the one displayed by "wireshark".
*/
static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
{
return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
}
#else
static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
{
return 0;
}
#endif

71
fs/nfs/iostat.h Normal file
View file

@ -0,0 +1,71 @@
/*
* linux/fs/nfs/iostat.h
*
* Declarations for NFS client per-mount statistics
*
* Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
*
*/
#ifndef _NFS_IOSTAT
#define _NFS_IOSTAT
#include <linux/percpu.h>
#include <linux/cache.h>
#include <linux/nfs_iostat.h>
struct nfs_iostats {
unsigned long long bytes[__NFSIOS_BYTESMAX];
#ifdef CONFIG_NFS_FSCACHE
unsigned long long fscache[__NFSIOS_FSCACHEMAX];
#endif
unsigned long events[__NFSIOS_COUNTSMAX];
} ____cacheline_aligned;
static inline void nfs_inc_server_stats(const struct nfs_server *server,
enum nfs_stat_eventcounters stat)
{
this_cpu_inc(server->io_stats->events[stat]);
}
static inline void nfs_inc_stats(const struct inode *inode,
enum nfs_stat_eventcounters stat)
{
nfs_inc_server_stats(NFS_SERVER(inode), stat);
}
static inline void nfs_add_server_stats(const struct nfs_server *server,
enum nfs_stat_bytecounters stat,
long addend)
{
this_cpu_add(server->io_stats->bytes[stat], addend);
}
static inline void nfs_add_stats(const struct inode *inode,
enum nfs_stat_bytecounters stat,
long addend)
{
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
#ifdef CONFIG_NFS_FSCACHE
static inline void nfs_add_fscache_stats(struct inode *inode,
enum nfs_stat_fscachecounters stat,
long addend)
{
this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
}
#endif
static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
{
return alloc_percpu(struct nfs_iostats);
}
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
{
if (stats != NULL)
free_percpu(stats);
}
#endif /* _NFS_IOSTAT */

535
fs/nfs/mount_clnt.c Normal file
View file

@ -0,0 +1,535 @@
/*
* In-kernel MOUNT protocol client
*
* Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/uio.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs_fs.h>
#include "internal.h"
#ifdef NFS_DEBUG
# define NFSDBG_FACILITY NFSDBG_MOUNT
#endif
/*
* Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4
*/
#define MNTPATHLEN (1024)
/*
* XDR data type sizes
*/
#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN))
#define MNT_status_sz (1)
#define MNT_fhs_status_sz (1)
#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE)
#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE))
#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS)
/*
* XDR argument and result sizes
*/
#define MNT_enc_dirpath_sz encode_dirpath_sz
#define MNT_dec_mountres_sz (MNT_status_sz + MNT_fhandle_sz)
#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandle_sz + \
MNT_authflav3_sz)
/*
* Defined by RFC 1094, section A.5
*/
enum {
MOUNTPROC_NULL = 0,
MOUNTPROC_MNT = 1,
MOUNTPROC_DUMP = 2,
MOUNTPROC_UMNT = 3,
MOUNTPROC_UMNTALL = 4,
MOUNTPROC_EXPORT = 5,
};
/*
* Defined by RFC 1813, section 5.2
*/
enum {
MOUNTPROC3_NULL = 0,
MOUNTPROC3_MNT = 1,
MOUNTPROC3_DUMP = 2,
MOUNTPROC3_UMNT = 3,
MOUNTPROC3_UMNTALL = 4,
MOUNTPROC3_EXPORT = 5,
};
static const struct rpc_program mnt_program;
/*
* Defined by OpenGroup XNFS Version 3W, chapter 8
*/
enum mountstat {
MNT_OK = 0,
MNT_EPERM = 1,
MNT_ENOENT = 2,
MNT_EACCES = 13,
MNT_EINVAL = 22,
};
static struct {
u32 status;
int errno;
} mnt_errtbl[] = {
{ .status = MNT_OK, .errno = 0, },
{ .status = MNT_EPERM, .errno = -EPERM, },
{ .status = MNT_ENOENT, .errno = -ENOENT, },
{ .status = MNT_EACCES, .errno = -EACCES, },
{ .status = MNT_EINVAL, .errno = -EINVAL, },
};
/*
* Defined by RFC 1813, section 5.1.5
*/
enum mountstat3 {
MNT3_OK = 0, /* no error */
MNT3ERR_PERM = 1, /* Not owner */
MNT3ERR_NOENT = 2, /* No such file or directory */
MNT3ERR_IO = 5, /* I/O error */
MNT3ERR_ACCES = 13, /* Permission denied */
MNT3ERR_NOTDIR = 20, /* Not a directory */
MNT3ERR_INVAL = 22, /* Invalid argument */
MNT3ERR_NAMETOOLONG = 63, /* Filename too long */
MNT3ERR_NOTSUPP = 10004, /* Operation not supported */
MNT3ERR_SERVERFAULT = 10006, /* A failure on the server */
};
static struct {
u32 status;
int errno;
} mnt3_errtbl[] = {
{ .status = MNT3_OK, .errno = 0, },
{ .status = MNT3ERR_PERM, .errno = -EPERM, },
{ .status = MNT3ERR_NOENT, .errno = -ENOENT, },
{ .status = MNT3ERR_IO, .errno = -EIO, },
{ .status = MNT3ERR_ACCES, .errno = -EACCES, },
{ .status = MNT3ERR_NOTDIR, .errno = -ENOTDIR, },
{ .status = MNT3ERR_INVAL, .errno = -EINVAL, },
{ .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, },
{ .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, },
{ .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, },
};
struct mountres {
int errno;
struct nfs_fh *fh;
unsigned int *auth_count;
rpc_authflavor_t *auth_flavors;
};
struct mnt_fhstatus {
u32 status;
struct nfs_fh *fh;
};
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
*
* Uses default timeout parameters specified by underlying transport. On
* successful return, the auth_flavs list and auth_flav_len will be populated
* with the list from the server or a faked-up list if the server didn't
* provide one.
*/
int nfs_mount(struct nfs_mount_request *info)
{
struct mountres result = {
.fh = info->fh,
.auth_count = info->auth_flav_len,
.auth_flavors = info->auth_flavs,
};
struct rpc_message msg = {
.rpc_argp = info->dirpath,
.rpc_resp = &result,
};
struct rpc_create_args args = {
.net = info->net,
.protocol = info->protocol,
.address = info->sap,
.addrsize = info->salen,
.servername = info->hostname,
.program = &mnt_program,
.version = info->version,
.authflavor = RPC_AUTH_UNIX,
};
struct rpc_clnt *mnt_clnt;
int status;
dprintk("NFS: sending MNT request for %s:%s\n",
(info->hostname ? info->hostname : "server"),
info->dirpath);
if (strlen(info->dirpath) > MNTPATHLEN)
return -ENAMETOOLONG;
if (info->noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
mnt_clnt = rpc_create(&args);
if (IS_ERR(mnt_clnt))
goto out_clnt_err;
if (info->version == NFS_MNT3_VERSION)
msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
else
msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
rpc_shutdown_client(mnt_clnt);
if (status < 0)
goto out_call_err;
if (result.errno != 0)
goto out_mnt_err;
dprintk("NFS: MNT request succeeded\n");
status = 0;
/*
* If the server didn't provide a flavor list, allow the
* client to try any flavor.
*/
if (info->version != NFS_MNT3_VERSION || *info->auth_flav_len == 0) {
dprintk("NFS: Faking up auth_flavs list\n");
info->auth_flavs[0] = RPC_AUTH_NULL;
*info->auth_flav_len = 1;
}
out:
return status;
out_clnt_err:
status = PTR_ERR(mnt_clnt);
dprintk("NFS: failed to create MNT RPC client, status=%d\n", status);
goto out;
out_call_err:
dprintk("NFS: MNT request failed, status=%d\n", status);
goto out;
out_mnt_err:
dprintk("NFS: MNT server returned result %d\n", result.errno);
status = result.errno;
goto out;
}
/**
* nfs_umount - Notify a server that we have unmounted this export
* @info: pointer to umount request arguments
*
* MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
* use UDP.
*/
void nfs_umount(const struct nfs_mount_request *info)
{
static const struct rpc_timeout nfs_umnt_timeout = {
.to_initval = 1 * HZ,
.to_maxval = 3 * HZ,
.to_retries = 2,
};
struct rpc_create_args args = {
.net = info->net,
.protocol = IPPROTO_UDP,
.address = info->sap,
.addrsize = info->salen,
.timeout = &nfs_umnt_timeout,
.servername = info->hostname,
.program = &mnt_program,
.version = info->version,
.authflavor = RPC_AUTH_UNIX,
.flags = RPC_CLNT_CREATE_NOPING,
};
struct rpc_message msg = {
.rpc_argp = info->dirpath,
};
struct rpc_clnt *clnt;
int status;
if (strlen(info->dirpath) > MNTPATHLEN)
return;
if (info->noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
clnt = rpc_create(&args);
if (IS_ERR(clnt))
goto out_clnt_err;
dprintk("NFS: sending UMNT request for %s:%s\n",
(info->hostname ? info->hostname : "server"), info->dirpath);
if (info->version == NFS_MNT3_VERSION)
msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
else
msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
status = rpc_call_sync(clnt, &msg, 0);
rpc_shutdown_client(clnt);
if (unlikely(status < 0))
goto out_call_err;
return;
out_clnt_err:
dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
PTR_ERR(clnt));
return;
out_call_err:
dprintk("NFS: UMNT request failed, status=%d\n", status);
}
/*
* XDR encode/decode functions for MOUNT
*/
static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
{
const u32 pathname_len = strlen(pathname);
__be32 *p;
p = xdr_reserve_space(xdr, 4 + pathname_len);
xdr_encode_opaque(p, pathname, pathname_len);
}
static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
const char *dirpath)
{
encode_mntdirpath(xdr, dirpath);
}
/*
* RFC 1094: "A non-zero status indicates some sort of error. In this
* case, the status is a UNIX error number." This can be problematic
* if the server and client use different errno values for the same
* error.
*
* However, the OpenGroup XNFS spec provides a simple mapping that is
* independent of local errno values on the server and the client.
*/
static int decode_status(struct xdr_stream *xdr, struct mountres *res)
{
unsigned int i;
u32 status;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
status = be32_to_cpup(p);
for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
if (mnt_errtbl[i].status == status) {
res->errno = mnt_errtbl[i].errno;
return 0;
}
}
dprintk("NFS: unrecognized MNT status code: %u\n", status);
res->errno = -EACCES;
return 0;
}
static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
{
struct nfs_fh *fh = res->fh;
__be32 *p;
p = xdr_inline_decode(xdr, NFS2_FHSIZE);
if (unlikely(p == NULL))
return -EIO;
fh->size = NFS2_FHSIZE;
memcpy(fh->data, p, NFS2_FHSIZE);
return 0;
}
static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct mountres *res)
{
int status;
status = decode_status(xdr, res);
if (unlikely(status != 0 || res->errno != 0))
return status;
return decode_fhandle(xdr, res);
}
static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
{
unsigned int i;
u32 status;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
status = be32_to_cpup(p);
for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
if (mnt3_errtbl[i].status == status) {
res->errno = mnt3_errtbl[i].errno;
return 0;
}
}
dprintk("NFS: unrecognized MNT3 status code: %u\n", status);
res->errno = -EACCES;
return 0;
}
static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
{
struct nfs_fh *fh = res->fh;
u32 size;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
size = be32_to_cpup(p);
if (size > NFS3_FHSIZE || size == 0)
return -EIO;
p = xdr_inline_decode(xdr, size);
if (unlikely(p == NULL))
return -EIO;
fh->size = size;
memcpy(fh->data, p, size);
return 0;
}
static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
{
rpc_authflavor_t *flavors = res->auth_flavors;
unsigned int *count = res->auth_count;
u32 entries, i;
__be32 *p;
if (*count == 0)
return 0;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
entries = be32_to_cpup(p);
dprintk("NFS: received %u auth flavors\n", entries);
if (entries > NFS_MAX_SECFLAVORS)
entries = NFS_MAX_SECFLAVORS;
p = xdr_inline_decode(xdr, 4 * entries);
if (unlikely(p == NULL))
return -EIO;
if (entries > *count)
entries = *count;
for (i = 0; i < entries; i++) {
flavors[i] = be32_to_cpup(p++);
dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
}
*count = i;
return 0;
}
static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct mountres *res)
{
int status;
status = decode_fhs_status(xdr, res);
if (unlikely(status != 0 || res->errno != 0))
return status;
status = decode_fhandle3(xdr, res);
if (unlikely(status != 0)) {
res->errno = -EBADHANDLE;
return 0;
}
return decode_auth_flavors(xdr, res);
}
static struct rpc_procinfo mnt_procedures[] = {
[MOUNTPROC_MNT] = {
.p_proc = MOUNTPROC_MNT,
.p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres,
.p_arglen = MNT_enc_dirpath_sz,
.p_replen = MNT_dec_mountres_sz,
.p_statidx = MOUNTPROC_MNT,
.p_name = "MOUNT",
},
[MOUNTPROC_UMNT] = {
.p_proc = MOUNTPROC_UMNT,
.p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_arglen = MNT_enc_dirpath_sz,
.p_statidx = MOUNTPROC_UMNT,
.p_name = "UMOUNT",
},
};
static struct rpc_procinfo mnt3_procedures[] = {
[MOUNTPROC3_MNT] = {
.p_proc = MOUNTPROC3_MNT,
.p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres3,
.p_arglen = MNT_enc_dirpath_sz,
.p_replen = MNT_dec_mountres3_sz,
.p_statidx = MOUNTPROC3_MNT,
.p_name = "MOUNT",
},
[MOUNTPROC3_UMNT] = {
.p_proc = MOUNTPROC3_UMNT,
.p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_arglen = MNT_enc_dirpath_sz,
.p_statidx = MOUNTPROC3_UMNT,
.p_name = "UMOUNT",
},
};
static const struct rpc_version mnt_version1 = {
.number = 1,
.nrprocs = ARRAY_SIZE(mnt_procedures),
.procs = mnt_procedures,
};
static const struct rpc_version mnt_version3 = {
.number = 3,
.nrprocs = ARRAY_SIZE(mnt3_procedures),
.procs = mnt3_procedures,
};
static const struct rpc_version *mnt_version[] = {
NULL,
&mnt_version1,
NULL,
&mnt_version3,
};
static struct rpc_stat mnt_stats;
static const struct rpc_program mnt_program = {
.name = "mount",
.number = NFS_MNT_PROGRAM,
.nrvers = ARRAY_SIZE(mnt_version),
.version = mnt_version,
.stats = &mnt_stats,
};

289
fs/nfs/namespace.c Normal file
View file

@ -0,0 +1,289 @@
/*
* linux/fs/nfs/namespace.c
*
* Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
* - Modified by David Howells <dhowells@redhat.com>
*
* NFS namespace
*/
#include <linux/module.h>
#include <linux/dcache.h>
#include <linux/gfp.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/nfs_fs.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
#include <linux/vfs.h>
#include <linux/sunrpc/gss_api.h>
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static void nfs_expire_automounts(struct work_struct *work);
static LIST_HEAD(nfs_automount_list);
static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
int nfs_mountpoint_expiry_timeout = 500 * HZ;
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
* @dentry - pointer to dentry
* @buffer - result buffer
* @buflen - length of buffer
* @flags - options (see below)
*
* Helper function for constructing the server pathname
* by arbitrary hashed dentry.
*
* This is mainly for use in figuring out the path on the
* server side when automounting on top of an existing partition
* and in generating /proc/mounts and friends.
*
* Supported flags:
* NFS_PATH_CANONICAL: ensure there is exactly one slash after
* the original device (export) name
* (if unset, the original name is returned verbatim)
*/
char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
unsigned flags)
{
char *end;
int namelen;
unsigned seq;
const char *base;
rename_retry:
end = buffer+buflen;
*--end = '\0';
buflen--;
seq = read_seqbegin(&rename_lock);
rcu_read_lock();
while (1) {
spin_lock(&dentry->d_lock);
if (IS_ROOT(dentry))
break;
namelen = dentry->d_name.len;
buflen -= namelen + 1;
if (buflen < 0)
goto Elong_unlock;
end -= namelen;
memcpy(end, dentry->d_name.name, namelen);
*--end = '/';
spin_unlock(&dentry->d_lock);
dentry = dentry->d_parent;
}
if (read_seqretry(&rename_lock, seq)) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
goto rename_retry;
}
if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
if (--buflen < 0) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
goto Elong;
}
*--end = '/';
}
*p = end;
base = dentry->d_fsdata;
if (!base) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
WARN_ON(1);
return end;
}
namelen = strlen(base);
if (flags & NFS_PATH_CANONICAL) {
/* Strip off excess slashes in base string */
while (namelen > 0 && base[namelen - 1] == '/')
namelen--;
}
buflen -= namelen;
if (buflen < 0) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
goto Elong;
}
end -= namelen;
memcpy(end, base, namelen);
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
return end;
Elong_unlock:
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
if (read_seqretry(&rename_lock, seq))
goto rename_retry;
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
EXPORT_SYMBOL_GPL(nfs_path);
/*
* nfs_d_automount - Handle crossing a mountpoint on the server
* @path - The mountpoint
*
* When we encounter a mountpoint on the server, we want to set up
* a mountpoint on the client too, to prevent inode numbers from
* colliding, and to allow "df" to work properly.
* On NFSv4, we also want to allow for the fact that different
* filesystems may be migrated to different servers in a failover
* situation, and that different filesystems may want to use
* different security flavours.
*/
struct vfsmount *nfs_d_automount(struct path *path)
{
struct vfsmount *mnt;
struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
dprintk("--> nfs_d_automount()\n");
mnt = ERR_PTR(-ESTALE);
if (IS_ROOT(path->dentry))
goto out_nofree;
mnt = ERR_PTR(-ENOMEM);
fh = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr();
if (fh == NULL || fattr == NULL)
goto out;
dprintk("%s: enter\n", __func__);
mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
if (IS_ERR(mnt))
goto out;
dprintk("%s: done, success\n", __func__);
mntget(mnt); /* prevent immediate expiration */
mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
out_nofree:
if (IS_ERR(mnt))
dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
else
dprintk("<-- %s() = %p\n", __func__, mnt);
return mnt;
}
static int
nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
if (NFS_FH(dentry->d_inode)->size != 0)
return nfs_getattr(mnt, dentry, stat);
generic_fillattr(dentry->d_inode, stat);
return 0;
}
static int
nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
{
if (NFS_FH(dentry->d_inode)->size != 0)
return nfs_setattr(dentry, attr);
return -EACCES;
}
const struct inode_operations nfs_mountpoint_inode_operations = {
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
const struct inode_operations nfs_referral_inode_operations = {
.getattr = nfs_namespace_getattr,
.setattr = nfs_namespace_setattr,
};
static void nfs_expire_automounts(struct work_struct *work)
{
struct list_head *list = &nfs_automount_list;
mark_mounts_for_expiry(list);
if (!list_empty(list))
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
}
void nfs_release_automount_timer(void)
{
if (list_empty(&nfs_automount_list))
cancel_delayed_work(&nfs_automount_task);
}
/*
* Clone a mountpoint of the appropriate type
*/
static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
{
return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
}
/**
* nfs_do_submount - set up mountpoint when crossing a filesystem boundary
* @dentry - parent directory
* @fh - filehandle for new root dentry
* @fattr - attributes for new root inode
* @authflavor - security flavor to use when performing the mount
*
*/
struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
{
struct nfs_clone_mount mountdata = {
.sb = dentry->d_sb,
.dentry = dentry,
.fh = fh,
.fattr = fattr,
.authflavor = authflavor,
};
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
char *page = (char *) __get_free_page(GFP_USER);
char *devname;
dprintk("--> nfs_do_submount()\n");
dprintk("%s: submounting on %pd2\n", __func__,
dentry);
if (page == NULL)
goto out;
devname = nfs_devname(dentry, page, PAGE_SIZE);
mnt = (struct vfsmount *)devname;
if (IS_ERR(devname))
goto free_page;
mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
free_page:
free_page((unsigned long)page);
out:
dprintk("%s: done\n", __func__);
dprintk("<-- nfs_do_submount() = %p\n", mnt);
return mnt;
}
EXPORT_SYMBOL_GPL(nfs_do_submount);
struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr)
{
int err;
struct dentry *parent = dget_parent(dentry);
/* Look it up again to get its attributes */
err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL);
dput(parent);
if (err != 0)
return ERR_PTR(err);
return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
}
EXPORT_SYMBOL_GPL(nfs_submount);

40
fs/nfs/netns.h Normal file
View file

@ -0,0 +1,40 @@
/*
* NFS-private data for each "struct net". Accessed with net_generic().
*/
#ifndef __NFS_NETNS_H__
#define __NFS_NETNS_H__
#include <linux/nfs4.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
struct bl_dev_msg {
int32_t status;
uint32_t major, minor;
};
struct nfs_net {
struct cache_detail *nfs_dns_resolve;
struct rpc_pipe *bl_device_pipe;
struct bl_dev_msg bl_mount_reply;
wait_queue_head_t bl_wq;
struct mutex bl_mutex;
struct list_head nfs_client_list;
struct list_head nfs_volume_list;
#if IS_ENABLED(CONFIG_NFS_V4)
struct idr cb_ident_idr; /* Protected by nfs_client_lock */
unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
int cb_users[NFS4_MAX_MINOR_VERSION + 1];
#endif
spinlock_t nfs_client_lock;
struct timespec boot_time;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_nfsfs;
#endif
};
extern int nfs_net_id;
#endif

29
fs/nfs/nfs.h Normal file
View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2012 Netapp, Inc. All rights reserved.
*
* Function and structures exported by the NFS module
* for use by NFS version-specific modules.
*/
#ifndef __LINUX_INTERNAL_NFS_H
#define __LINUX_INTERNAL_NFS_H
#include <linux/fs.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs_xdr.h>
struct nfs_subversion {
struct module *owner; /* THIS_MODULE pointer */
struct file_system_type *nfs_fs; /* NFS filesystem type */
const struct rpc_version *rpc_vers; /* NFS version information */
const struct nfs_rpc_ops *rpc_ops; /* NFS operations */
const struct super_operations *sops; /* NFS Super operations */
const struct xattr_handler **xattr; /* NFS xattr handlers */
struct list_head list; /* List of NFS versions */
};
struct nfs_subversion *get_nfs_version(unsigned int);
void put_nfs_version(struct nfs_subversion *);
void register_nfs_version(struct nfs_subversion *);
void unregister_nfs_version(struct nfs_subversion *);
#endif /* __LINUX_INTERNAL_NFS_H */

31
fs/nfs/nfs2super.c Normal file
View file

@ -0,0 +1,31 @@
/*
* Copyright (c) 2012 Netapp, Inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/nfs_fs.h>
#include "internal.h"
#include "nfs.h"
static struct nfs_subversion nfs_v2 = {
.owner = THIS_MODULE,
.nfs_fs = &nfs_fs_type,
.rpc_vers = &nfs_version2,
.rpc_ops = &nfs_v2_clientops,
.sops = &nfs_sops,
};
static int __init init_nfs_v2(void)
{
register_nfs_version(&nfs_v2);
return 0;
}
static void __exit exit_nfs_v2(void)
{
unregister_nfs_version(&nfs_v2);
}
MODULE_LICENSE("GPL");
module_init(init_nfs_v2);
module_exit(exit_nfs_v2);

1147
fs/nfs/nfs2xdr.c Normal file

File diff suppressed because it is too large Load diff

34
fs/nfs/nfs3_fs.h Normal file
View file

@ -0,0 +1,34 @@
/*
* Copyright (C) 2014 Anna Schumaker.
*
* NFSv3-specific filesystem definitions and declarations
*/
#ifndef __LINUX_FS_NFS_NFS3_FS_H
#define __LINUX_FS_NFS_NFS3_FS_H
/*
* nfs3acl.c
*/
#ifdef CONFIG_NFS_V3_ACL
extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl);
extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
extern const struct xattr_handler *nfs3_xattr_handlers[];
#else
static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl)
{
return 0;
}
#define nfs3_listxattr NULL
#endif /* CONFIG_NFS_V3_ACL */
/* nfs3client.c */
struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
#endif /* __LINUX_FS_NFS_NFS3_FS_H */

296
fs/nfs/nfs3acl.c Normal file
View file

@ -0,0 +1,296 @@
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/posix_acl_xattr.h>
#include <linux/nfsacl.h>
#include "internal.h"
#include "nfs3_fs.h"
#define NFSDBG_FACILITY NFSDBG_PROC
struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct page *pages[NFSACL_MAXPAGES] = { };
struct nfs3_getaclargs args = {
.fh = NFS_FH(inode),
/* The xdr layer may allocate pages here. */
.pages = pages,
};
struct nfs3_getaclres res = {
NULL,
};
struct rpc_message msg = {
.rpc_argp = &args,
.rpc_resp = &res,
};
int status, count;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
return ERR_PTR(-EOPNOTSUPP);
status = nfs_revalidate_inode(server, inode);
if (status < 0)
return ERR_PTR(status);
/*
* Only get the access acl when explicitly requested: We don't
* need it for access decisions, and only some applications use
* it. Applications which request the access acl first are not
* penalized from this optimization.
*/
if (type == ACL_TYPE_ACCESS)
args.mask |= NFS_ACLCNT|NFS_ACL;
if (S_ISDIR(inode->i_mode))
args.mask |= NFS_DFACLCNT|NFS_DFACL;
if (args.mask == 0)
return NULL;
dprintk("NFS call getacl\n");
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return ERR_PTR(-ENOMEM);
status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
__free_page(args.pages[count]);
switch (status) {
case 0:
status = nfs_refresh_inode(inode, res.fattr);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
dprintk("NFS_V3_ACL extension not supported; disabling\n");
server->caps &= ~NFS_CAP_ACLS;
case -ENOTSUPP:
status = -EOPNOTSUPP;
default:
goto getout;
}
if ((args.mask & res.mask) != args.mask) {
status = -EIO;
goto getout;
}
if (res.acl_access != NULL) {
if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) ||
res.acl_access->a_count == 0) {
posix_acl_release(res.acl_access);
res.acl_access = NULL;
}
}
if (res.mask & NFS_ACL)
set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access);
else
forget_cached_acl(inode, ACL_TYPE_ACCESS);
if (res.mask & NFS_DFACL)
set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default);
else
forget_cached_acl(inode, ACL_TYPE_DEFAULT);
nfs_free_fattr(res.fattr);
if (type == ACL_TYPE_ACCESS) {
posix_acl_release(res.acl_default);
return res.acl_access;
} else {
posix_acl_release(res.acl_access);
return res.acl_default;
}
getout:
posix_acl_release(res.acl_access);
posix_acl_release(res.acl_default);
nfs_free_fattr(res.fattr);
return ERR_PTR(status);
}
static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_fattr *fattr;
struct page *pages[NFSACL_MAXPAGES];
struct nfs3_setaclargs args = {
.inode = inode,
.mask = NFS_ACL,
.acl_access = acl,
.pages = pages,
};
struct rpc_message msg = {
.rpc_argp = &args,
.rpc_resp = &fattr,
};
int status = 0;
if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
goto out;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
goto out;
/* We are doing this here because XDR marshalling does not
* return any results, it BUGs. */
status = -ENOSPC;
if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
goto out;
if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES)
goto out;
if (S_ISDIR(inode->i_mode)) {
args.mask |= NFS_DFACL;
args.acl_default = dfacl;
args.len = nfsacl_size(acl, dfacl);
} else
args.len = nfsacl_size(acl, NULL);
if (args.len > NFS_ACL_INLINE_BUFSIZE) {
unsigned int npages = 1 + ((args.len - 1) >> PAGE_SHIFT);
status = -ENOMEM;
do {
args.pages[args.npages] = alloc_page(GFP_KERNEL);
if (args.pages[args.npages] == NULL)
goto out_freepages;
args.npages++;
} while (args.npages < npages);
}
dprintk("NFS call setacl\n");
status = -ENOMEM;
fattr = nfs_alloc_fattr();
if (fattr == NULL)
goto out_freepages;
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
msg.rpc_resp = fattr;
status = rpc_call_sync(server->client_acl, &msg, 0);
nfs_access_zap_cache(inode);
nfs_zap_acl_cache(inode);
dprintk("NFS reply setacl: %d\n", status);
switch (status) {
case 0:
status = nfs_refresh_inode(inode, fattr);
set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
dprintk("NFS_V3_ACL SETACL RPC not supported"
"(will not retry)\n");
server->caps &= ~NFS_CAP_ACLS;
case -ENOTSUPP:
status = -EOPNOTSUPP;
}
nfs_free_fattr(fattr);
out_freepages:
while (args.npages != 0) {
args.npages--;
__free_page(args.pages[args.npages]);
}
out:
return status;
}
int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl)
{
int ret;
ret = __nfs3_proc_setacls(inode, acl, dfacl);
return (ret == -EOPNOTSUPP) ? 0 : ret;
}
int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct posix_acl *alloc = NULL, *dfacl = NULL;
int status;
if (S_ISDIR(inode->i_mode)) {
switch(type) {
case ACL_TYPE_ACCESS:
alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(alloc))
goto fail;
break;
case ACL_TYPE_DEFAULT:
dfacl = acl;
alloc = acl = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(alloc))
goto fail;
break;
}
}
if (acl == NULL) {
alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(alloc))
goto fail;
}
status = __nfs3_proc_setacls(inode, acl, dfacl);
posix_acl_release(alloc);
return status;
fail:
return PTR_ERR(alloc);
}
const struct xattr_handler *nfs3_xattr_handlers[] = {
&posix_acl_access_xattr_handler,
&posix_acl_default_xattr_handler,
NULL,
};
static int
nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
size_t size, ssize_t *result)
{
struct posix_acl *acl;
char *p = data + *result;
acl = get_acl(inode, type);
if (IS_ERR_OR_NULL(acl))
return 0;
posix_acl_release(acl);
*result += strlen(name);
*result += 1;
if (!size)
return 0;
if (*result > size)
return -ERANGE;
strcpy(p, name);
return 0;
}
ssize_t
nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
{
struct inode *inode = dentry->d_inode;
ssize_t result = 0;
int error;
error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
POSIX_ACL_XATTR_ACCESS, data, size, &result);
if (error)
return error;
error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
POSIX_ACL_XATTR_DEFAULT, data, size, &result);
if (error)
return error;
return result;
}

66
fs/nfs/nfs3client.c Normal file
View file

@ -0,0 +1,66 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include "internal.h"
#include "nfs3_fs.h"
#ifdef CONFIG_NFS_V3_ACL
static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
static const struct rpc_version *nfsacl_version[] = {
[3] = &nfsacl_version3,
};
const struct rpc_program nfsacl_program = {
.name = "nfsacl",
.number = NFS_ACL_PROGRAM,
.nrvers = ARRAY_SIZE(nfsacl_version),
.version = nfsacl_version,
.stats = &nfsacl_rpcstat,
};
/*
* Initialise an NFSv3 ACL client connection
*/
static void nfs_init_server_aclclient(struct nfs_server *server)
{
if (server->flags & NFS_MOUNT_NOACL)
goto out_noacl;
server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
if (IS_ERR(server->client_acl))
goto out_noacl;
/* No errors! Assume that Sun nfsacls are supported */
server->caps |= NFS_CAP_ACLS;
return;
out_noacl:
server->caps &= ~NFS_CAP_ACLS;
}
#else
static inline void nfs_init_server_aclclient(struct nfs_server *server)
{
server->flags &= ~NFS_MOUNT_NOACL;
server->caps &= ~NFS_CAP_ACLS;
}
#endif
struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info,
struct nfs_subversion *nfs_mod)
{
struct nfs_server *server = nfs_create_server(mount_info, nfs_mod);
/* Create a client RPC handle for the NFS v3 ACL management interface */
if (!IS_ERR(server))
nfs_init_server_aclclient(server);
return server;
}
struct nfs_server *nfs3_clone_server(struct nfs_server *source,
struct nfs_fh *fh,
struct nfs_fattr *fattr,
rpc_authflavor_t flavor)
{
struct nfs_server *server = nfs_clone_server(source, fh, fattr, flavor);
if (!IS_ERR(server) && !IS_ERR(source->client_acl))
nfs_init_server_aclclient(server);
return server;
}

965
fs/nfs/nfs3proc.c Normal file
View file

@ -0,0 +1,965 @@
/*
* linux/fs/nfs/nfs3proc.c
*
* Client-side NFSv3 procedures stubs.
*
* Copyright (C) 1997, Olaf Kirch
*/
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
#include <linux/slab.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/lockd/bind.h>
#include <linux/nfs_mount.h>
#include <linux/freezer.h>
#include <linux/xattr.h>
#include "iostat.h"
#include "internal.h"
#include "nfs3_fs.h"
#define NFSDBG_FACILITY NFSDBG_PROC
/* A wrapper to handle the EJUKEBOX error messages */
static int
nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
int res;
do {
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
}
#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags)
static int
nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
{
if (task->tk_status != -EJUKEBOX)
return 0;
if (task->tk_status == -EJUKEBOX)
nfs_inc_stats(inode, NFSIOS_DELAY);
task->tk_status = 0;
rpc_restart_call(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
return 1;
}
static int
do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
.rpc_argp = fhandle,
.rpc_resp = info,
};
int status;
dprintk("%s: call fsinfo\n", __func__);
nfs_fattr_init(info->fattr);
status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __func__, status);
if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_resp = info->fattr;
status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply getattr: %d\n", __func__, status);
}
return status;
}
/*
* Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
*/
static int
nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int status;
status = do_proc_get_root(server->client, fhandle, info);
if (status && server->nfs_client->cl_rpcclient != server->client)
status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
return status;
}
/*
* One function for each procedure in the NFS protocol.
*/
static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
.rpc_argp = fhandle,
.rpc_resp = fattr,
};
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
static int
nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct iattr *sattr)
{
struct inode *inode = dentry->d_inode;
struct nfs3_sattrargs arg = {
.fh = NFS_FH(inode),
.sattr = sattr,
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR],
.rpc_argp = &arg,
.rpc_resp = fattr,
};
int status;
dprintk("NFS call setattr\n");
if (sattr->ia_valid & ATTR_FILE)
msg.rpc_cred = nfs_file_cred(sattr->ia_file);
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
static int
nfs3_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct nfs3_diropres res = {
.fh = fhandle,
.fattr = fattr
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP],
.rpc_argp = &arg,
.rpc_resp = &res,
};
int status;
dprintk("NFS call lookup %s\n", name->name);
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
return -ENOMEM;
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_refresh_inode(dir, res.dir_attr);
if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_argp = fhandle;
msg.rpc_resp = fattr;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
nfs_free_fattr(res.dir_attr);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
};
struct nfs3_accessres res;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
.rpc_cred = entry->cred,
};
int mode = entry->mask;
int status = -ENOMEM;
dprintk("NFS call access\n");
if (mode & MAY_READ)
arg.access |= NFS3_ACCESS_READ;
if (S_ISDIR(inode->i_mode)) {
if (mode & MAY_WRITE)
arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
if (mode & MAY_EXEC)
arg.access |= NFS3_ACCESS_LOOKUP;
} else {
if (mode & MAY_WRITE)
arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
if (mode & MAY_EXEC)
arg.access |= NFS3_ACCESS_EXECUTE;
}
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
goto out;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, res.fattr);
if (status == 0) {
entry->mask = 0;
if (res.access & NFS3_ACCESS_READ)
entry->mask |= MAY_READ;
if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
entry->mask |= MAY_WRITE;
if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
entry->mask |= MAY_EXEC;
}
nfs_free_fattr(res.fattr);
out:
dprintk("NFS reply access: %d\n", status);
return status;
}
static int nfs3_proc_readlink(struct inode *inode, struct page *page,
unsigned int pgbase, unsigned int pglen)
{
struct nfs_fattr *fattr;
struct nfs3_readlinkargs args = {
.fh = NFS_FH(inode),
.pgbase = pgbase,
.pglen = pglen,
.pages = &page
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
.rpc_argp = &args,
};
int status = -ENOMEM;
dprintk("NFS call readlink\n");
fattr = nfs_alloc_fattr();
if (fattr == NULL)
goto out;
msg.rpc_resp = fattr;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, fattr);
nfs_free_fattr(fattr);
out:
dprintk("NFS reply readlink: %d\n", status);
return status;
}
struct nfs3_createdata {
struct rpc_message msg;
union {
struct nfs3_createargs create;
struct nfs3_mkdirargs mkdir;
struct nfs3_symlinkargs symlink;
struct nfs3_mknodargs mknod;
} arg;
struct nfs3_diropres res;
struct nfs_fh fh;
struct nfs_fattr fattr;
struct nfs_fattr dir_attr;
};
static struct nfs3_createdata *nfs3_alloc_createdata(void)
{
struct nfs3_createdata *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data != NULL) {
data->msg.rpc_argp = &data->arg;
data->msg.rpc_resp = &data->res;
data->res.fh = &data->fh;
data->res.fattr = &data->fattr;
data->res.dir_attr = &data->dir_attr;
nfs_fattr_init(data->res.fattr);
nfs_fattr_init(data->res.dir_attr);
}
return data;
}
static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data)
{
int status;
status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
nfs_post_op_update_inode(dir, data->res.dir_attr);
if (status == 0)
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
return status;
}
static void nfs3_free_createdata(struct nfs3_createdata *data)
{
kfree(data);
}
/*
* Create a regular file.
*/
static int
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
int status = -ENOMEM;
dprintk("NFS call create %pd\n", dentry);
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE];
data->arg.create.fh = NFS_FH(dir);
data->arg.create.name = dentry->d_name.name;
data->arg.create.len = dentry->d_name.len;
data->arg.create.sattr = sattr;
data->arg.create.createmode = NFS3_CREATE_UNCHECKED;
if (flags & O_EXCL) {
data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE;
data->arg.create.verifier[0] = cpu_to_be32(jiffies);
data->arg.create.verifier[1] = cpu_to_be32(current->pid);
}
status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
if (status)
goto out;
for (;;) {
status = nfs3_do_create(dir, dentry, data);
if (status != -ENOTSUPP)
break;
/* If the server doesn't support the exclusive creation
* semantics, try again with simple 'guarded' mode. */
switch (data->arg.create.createmode) {
case NFS3_CREATE_EXCLUSIVE:
data->arg.create.createmode = NFS3_CREATE_GUARDED;
break;
case NFS3_CREATE_GUARDED:
data->arg.create.createmode = NFS3_CREATE_UNCHECKED;
break;
case NFS3_CREATE_UNCHECKED:
goto out;
}
nfs_fattr_init(data->res.dir_attr);
nfs_fattr_init(data->res.fattr);
}
if (status != 0)
goto out_release_acls;
/* When we created the file with exclusive semantics, make
* sure we set the attributes afterwards. */
if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) {
dprintk("NFS call setattr (post-create)\n");
if (!(sattr->ia_valid & ATTR_ATIME_SET))
sattr->ia_valid |= ATTR_ATIME;
if (!(sattr->ia_valid & ATTR_MTIME_SET))
sattr->ia_valid |= ATTR_MTIME;
/* Note: we could use a guarded setattr here, but I'm
* not sure this buys us anything (and I'd have
* to revamp the NFSv3 XDR code) */
status = nfs3_proc_setattr(dentry, data->res.fattr, sattr);
nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);
dprintk("NFS reply setattr (post-create): %d\n", status);
if (status != 0)
goto out_release_acls;
}
status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
out_release_acls:
posix_acl_release(acl);
posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply create: %d\n", status);
return status;
}
static int
nfs3_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
.name = *name,
};
struct nfs_removeres res;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE],
.rpc_argp = &arg,
.rpc_resp = &res,
};
int status = -ENOMEM;
dprintk("NFS call remove %s\n", name->name);
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
goto out;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, res.dir_attr);
nfs_free_fattr(res.dir_attr);
out:
dprintk("NFS reply remove: %d\n", status);
return status;
}
static void
nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
}
static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
{
rpc_call_start(task);
}
static int
nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
{
struct nfs_removeres *res;
if (nfs3_async_handle_jukebox(task, dir))
return 0;
res = task->tk_msg.rpc_resp;
nfs_post_op_update_inode(dir, res->dir_attr);
return 1;
}
static void
nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
}
static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
{
rpc_call_start(task);
}
static int
nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
struct inode *new_dir)
{
struct nfs_renameres *res;
if (nfs3_async_handle_jukebox(task, old_dir))
return 0;
res = task->tk_msg.rpc_resp;
nfs_post_op_update_inode(old_dir, res->old_fattr);
nfs_post_op_update_inode(new_dir, res->new_fattr);
return 1;
}
static int
nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs3_linkargs arg = {
.fromfh = NFS_FH(inode),
.tofh = NFS_FH(dir),
.toname = name->name,
.tolen = name->len
};
struct nfs3_linkres res;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
.rpc_argp = &arg,
.rpc_resp = &res,
};
int status = -ENOMEM;
dprintk("NFS call link %s\n", name->name);
res.fattr = nfs_alloc_fattr();
res.dir_attr = nfs_alloc_fattr();
if (res.fattr == NULL || res.dir_attr == NULL)
goto out;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_post_op_update_inode(dir, res.dir_attr);
nfs_post_op_update_inode(inode, res.fattr);
out:
nfs_free_fattr(res.dir_attr);
nfs_free_fattr(res.fattr);
dprintk("NFS reply link: %d\n", status);
return status;
}
static int
nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
unsigned int len, struct iattr *sattr)
{
struct nfs3_createdata *data;
int status = -ENOMEM;
if (len > NFS3_MAXPATHLEN)
return -ENAMETOOLONG;
dprintk("NFS call symlink %pd\n", dentry);
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK];
data->arg.symlink.fromfh = NFS_FH(dir);
data->arg.symlink.fromname = dentry->d_name.name;
data->arg.symlink.fromlen = dentry->d_name.len;
data->arg.symlink.pages = &page;
data->arg.symlink.pathlen = len;
data->arg.symlink.sattr = sattr;
status = nfs3_do_create(dir, dentry, data);
nfs3_free_createdata(data);
out:
dprintk("NFS reply symlink: %d\n", status);
return status;
}
static int
nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
int status = -ENOMEM;
dprintk("NFS call mkdir %pd\n", dentry);
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
if (status)
goto out;
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
data->arg.mkdir.fh = NFS_FH(dir);
data->arg.mkdir.name = dentry->d_name.name;
data->arg.mkdir.len = dentry->d_name.len;
data->arg.mkdir.sattr = sattr;
status = nfs3_do_create(dir, dentry, data);
if (status != 0)
goto out_release_acls;
status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
out_release_acls:
posix_acl_release(acl);
posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply mkdir: %d\n", status);
return status;
}
static int
nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
{
struct nfs_fattr *dir_attr;
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
.rpc_argp = &arg,
};
int status = -ENOMEM;
dprintk("NFS call rmdir %s\n", name->name);
dir_attr = nfs_alloc_fattr();
if (dir_attr == NULL)
goto out;
msg.rpc_resp = dir_attr;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, dir_attr);
nfs_free_fattr(dir_attr);
out:
dprintk("NFS reply rmdir: %d\n", status);
return status;
}
/*
* The READDIR implementation is somewhat hackish - we pass the user buffer
* to the encode function, which installs it in the receive iovec.
* The decode function itself doesn't perform any decoding, it just makes
* sure the reply is syntactically correct.
*
* Also note that this implementation handles both plain readdir and
* readdirplus.
*/
static int
nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
{
struct inode *dir = dentry->d_inode;
__be32 *verf = NFS_I(dir)->cookieverf;
struct nfs3_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
.verf = {verf[0], verf[1]},
.plus = plus,
.count = count,
.pages = pages
};
struct nfs3_readdirres res = {
.verf = verf,
.plus = plus
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
.rpc_argp = &arg,
.rpc_resp = &res,
.rpc_cred = cred
};
int status = -ENOMEM;
if (plus)
msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
dprintk("NFS call readdir%s %d\n",
plus? "plus" : "", (unsigned int) cookie);
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
goto out;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_invalidate_atime(dir);
nfs_refresh_inode(dir, res.dir_attr);
nfs_free_fattr(res.dir_attr);
out:
dprintk("NFS reply readdir%s: %d\n",
plus? "plus" : "", status);
return status;
}
static int
nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
dev_t rdev)
{
struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
int status = -ENOMEM;
dprintk("NFS call mknod %pd %u:%u\n", dentry,
MAJOR(rdev), MINOR(rdev));
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
if (status)
goto out;
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD];
data->arg.mknod.fh = NFS_FH(dir);
data->arg.mknod.name = dentry->d_name.name;
data->arg.mknod.len = dentry->d_name.len;
data->arg.mknod.sattr = sattr;
data->arg.mknod.rdev = rdev;
switch (sattr->ia_mode & S_IFMT) {
case S_IFBLK:
data->arg.mknod.type = NF3BLK;
break;
case S_IFCHR:
data->arg.mknod.type = NF3CHR;
break;
case S_IFIFO:
data->arg.mknod.type = NF3FIFO;
break;
case S_IFSOCK:
data->arg.mknod.type = NF3SOCK;
break;
default:
status = -EINVAL;
goto out;
}
status = nfs3_do_create(dir, dentry, data);
if (status != 0)
goto out_release_acls;
status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
out_release_acls:
posix_acl_release(acl);
posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply mknod: %d\n", status);
return status;
}
static int
nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT],
.rpc_argp = fhandle,
.rpc_resp = stat,
};
int status;
dprintk("NFS call fsstat\n");
nfs_fattr_init(stat->fattr);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply fsstat: %d\n", status);
return status;
}
static int
do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
.rpc_argp = fhandle,
.rpc_resp = info,
};
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
status = rpc_call_sync(client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
/*
* Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via
* nfs_create_server
*/
static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int status;
status = do_proc_fsinfo(server->client, fhandle, info);
if (status && server->nfs_client->cl_rpcclient != server->client)
status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info);
return status;
}
static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF],
.rpc_argp = fhandle,
.rpc_resp = info,
};
int status;
dprintk("NFS call pathconf\n");
nfs_fattr_init(info->fattr);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply pathconf: %d\n", status);
return status;
}
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &hdr->fattr);
return 0;
}
static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
rpc_call_start(task);
return 0;
}
static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0;
}
static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
rpc_call_start(task);
}
static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
nfs_refresh_inode(data->inode, data->res.fattr);
return 0;
}
static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
}
static int
nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = file_inode(filp);
return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
}
static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
{
return 0;
}
static int nfs3_return_delegation(struct inode *inode)
{
nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs3_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
.mkdir = nfs_mkdir,
.rmdir = nfs_rmdir,
.mknod = nfs_mknod,
.rename = nfs_rename,
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
#ifdef CONFIG_NFS_V3_ACL
.listxattr = nfs3_listxattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
.removexattr = generic_removexattr,
.get_acl = nfs3_get_acl,
.set_acl = nfs3_set_acl,
#endif
};
static const struct inode_operations nfs3_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
#ifdef CONFIG_NFS_V3_ACL
.listxattr = nfs3_listxattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
.removexattr = generic_removexattr,
.get_acl = nfs3_get_acl,
.set_acl = nfs3_set_acl,
#endif
};
const struct nfs_rpc_ops nfs_v3_clientops = {
.version = 3, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs3_dir_inode_operations,
.file_inode_ops = &nfs3_file_inode_operations,
.file_ops = &nfs_file_operations,
.getroot = nfs3_proc_get_root,
.submount = nfs_submount,
.try_mount = nfs_try_mount,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
.access = nfs3_proc_access,
.readlink = nfs3_proc_readlink,
.create = nfs3_proc_create,
.remove = nfs3_proc_remove,
.unlink_setup = nfs3_proc_unlink_setup,
.unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
.unlink_done = nfs3_proc_unlink_done,
.rename_setup = nfs3_proc_rename_setup,
.rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
.rename_done = nfs3_proc_rename_done,
.link = nfs3_proc_link,
.symlink = nfs3_proc_symlink,
.mkdir = nfs3_proc_mkdir,
.rmdir = nfs3_proc_rmdir,
.readdir = nfs3_proc_readdir,
.mknod = nfs3_proc_mknod,
.statfs = nfs3_proc_statfs,
.fsinfo = nfs3_proc_fsinfo,
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
.read_setup = nfs3_proc_read_setup,
.read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
.clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
.return_delegation = nfs3_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
.create_server = nfs3_create_server,
.clone_server = nfs3_clone_server,
};

35
fs/nfs/nfs3super.c Normal file
View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 Netapp, Inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/nfs_fs.h>
#include "internal.h"
#include "nfs3_fs.h"
#include "nfs.h"
static struct nfs_subversion nfs_v3 = {
.owner = THIS_MODULE,
.nfs_fs = &nfs_fs_type,
.rpc_vers = &nfs_version3,
.rpc_ops = &nfs_v3_clientops,
.sops = &nfs_sops,
#ifdef CONFIG_NFS_V3_ACL
.xattr = nfs3_xattr_handlers,
#endif
};
static int __init init_nfs_v3(void)
{
register_nfs_version(&nfs_v3);
return 0;
}
static void __exit exit_nfs_v3(void)
{
unregister_nfs_version(&nfs_v3);
}
MODULE_LICENSE("GPL");
module_init(init_nfs_v3);
module_exit(exit_nfs_v3);

2556
fs/nfs/nfs3xdr.c Normal file

File diff suppressed because it is too large Load diff

14
fs/nfs/nfs42.h Normal file
View file

@ -0,0 +1,14 @@
/*
* Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
*/
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
/* nfs4.2proc.c */
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
/* nfs4.2xdr.h */
extern struct rpc_procinfo nfs4_2_procedures[];
#endif /* __LINUX_FS_NFS_NFS4_2_H */

69
fs/nfs/nfs42proc.c Normal file
View file

@ -0,0 +1,69 @@
/*
* Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
*/
#include <linux/fs.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/nfs_xdr.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "nfs42.h"
static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
fmode_t fmode)
{
struct nfs_open_context *open;
struct nfs_lock_context *lock;
int ret;
open = get_nfs_open_context(nfs_file_open_context(file));
lock = nfs_get_lock_context(open);
if (IS_ERR(lock)) {
put_nfs_open_context(open);
return PTR_ERR(lock);
}
ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
nfs_put_lock_context(lock);
put_nfs_open_context(open);
return ret;
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{
struct inode *inode = file_inode(filep);
struct nfs42_seek_args args = {
.sa_fh = NFS_FH(inode),
.sa_offset = offset,
.sa_what = (whence == SEEK_HOLE) ?
NFS4_CONTENT_HOLE : NFS4_CONTENT_DATA,
};
struct nfs42_seek_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEEK],
.rpc_argp = &args,
.rpc_resp = &res,
};
struct nfs_server *server = NFS_SERVER(inode);
int status;
if (!(server->caps & NFS_CAP_SEEK))
return -ENOTSUPP;
status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
if (status)
return status;
nfs_wb_all(inode);
status = nfs4_call_sync(server->client, server, &msg,
&args.seq_args, &res.seq_res, 0);
if (status == -ENOTSUPP)
server->caps &= ~NFS_CAP_SEEK;
if (status)
return status;
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}

98
fs/nfs/nfs42xdr.c Normal file
View file

@ -0,0 +1,98 @@
/*
* Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
*/
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
1 /* whence */)
#define decode_seek_maxsz (op_decode_hdr_maxsz + \
1 /* eof */ + \
1 /* whence */ + \
2 /* offset */ + \
2 /* length */)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_seek_maxsz)
#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_seek_maxsz)
static void encode_seek(struct xdr_stream *xdr,
struct nfs42_seek_args *args,
struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr);
encode_nfs4_stateid(xdr, &args->sa_stateid);
encode_uint64(xdr, args->sa_offset);
encode_uint32(xdr, args->sa_what);
}
/*
* Encode SEEK request
*/
static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs42_seek_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->sa_fh, &hdr);
encode_seek(xdr, args, &hdr);
encode_nops(&hdr);
}
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
{
int status;
__be32 *p;
status = decode_op_hdr(xdr, OP_SEEK);
if (status)
return status;
p = xdr_inline_decode(xdr, 4 + 8);
if (unlikely(!p))
goto out_overflow;
res->sr_eof = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &res->sr_offset);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
* Decode SEEK request
*/
static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs42_seek_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_seek(xdr, res);
out:
return status;
}
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */

519
fs/nfs/nfs4_fs.h Normal file
View file

@ -0,0 +1,519 @@
/*
* linux/fs/nfs/nfs4_fs.h
*
* Copyright (C) 2005 Trond Myklebust
*
* NFSv4-specific filesystem definitions and declarations
*/
#ifndef __LINUX_FS_NFS_NFS4_FS_H
#define __LINUX_FS_NFS_NFS4_FS_H
#if defined(CONFIG_NFS_V4_2)
#define NFS4_MAX_MINOR_VERSION 2
#elif defined(CONFIG_NFS_V4_1)
#define NFS4_MAX_MINOR_VERSION 1
#else
#define NFS4_MAX_MINOR_VERSION 0
#endif
#if IS_ENABLED(CONFIG_NFS_V4)
#define NFS4_MAX_LOOP_ON_RECOVER (10)
#include <linux/seqlock.h>
struct idmap;
enum nfs4_client_state {
NFS4CLNT_MANAGER_RUNNING = 0,
NFS4CLNT_CHECK_LEASE,
NFS4CLNT_LEASE_EXPIRED,
NFS4CLNT_RECLAIM_REBOOT,
NFS4CLNT_RECLAIM_NOGRACE,
NFS4CLNT_DELEGRETURN,
NFS4CLNT_SESSION_RESET,
NFS4CLNT_LEASE_CONFIRM,
NFS4CLNT_SERVER_SCOPE_MISMATCH,
NFS4CLNT_PURGE_STATE,
NFS4CLNT_BIND_CONN_TO_SESSION,
NFS4CLNT_MOVED,
NFS4CLNT_LEASE_MOVED,
};
#define NFS4_RENEW_TIMEOUT 0x01
#define NFS4_RENEW_DELEGATION_CB 0x02
struct nfs4_minor_version_ops {
u32 minor_version;
unsigned init_caps;
int (*init_client)(struct nfs_client *);
void (*shutdown_client)(struct nfs_client *);
bool (*match_stateid)(const nfs4_stateid *,
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
const struct nfs4_mig_recovery_ops *mig_recovery_ops;
};
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
ktime_t create_time;
int owner_id;
int flags;
u32 counter;
spinlock_t lock; /* Protects the list */
struct list_head list; /* Defines sequence of RPC calls */
struct rpc_wait_queue wait; /* RPC call delay queue */
};
struct nfs_seqid {
struct nfs_seqid_counter *sequence;
struct list_head list;
struct rpc_task *task;
};
static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
{
if (seqid_mutating_err(-status))
seqid->flags |= NFS_SEQID_CONFIRMED;
}
/*
* NFS4 state_owners and lock_owners are simply labels for ordered
* sequences of RPC calls. Their sole purpose is to provide once-only
* semantics by allowing the server to identify replayed requests.
*/
struct nfs4_state_owner {
struct nfs_server *so_server;
struct list_head so_lru;
unsigned long so_expires;
struct rb_node so_server_node;
struct rpc_cred *so_cred; /* Associated cred */
spinlock_t so_lock;
atomic_t so_count;
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
seqcount_t so_reclaim_seqcount;
struct mutex so_delegreturn_mutex;
};
enum {
NFS_OWNER_RECLAIM_REBOOT,
NFS_OWNER_RECLAIM_NOGRACE
};
#define NFS_LOCK_NEW 0
#define NFS_LOCK_RECLAIM 1
#define NFS_LOCK_EXPIRED 2
/*
* struct nfs4_state maintains the client-side state for a given
* (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
*
* OPEN:
* In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
* we need to know how many files are open for reading or writing on a
* given inode. This information too is stored here.
*
* LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
*/
struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */
struct nfs4_state * ls_state; /* Pointer to open state */
#define NFS_LOCK_INITIALIZED 0
#define NFS_LOCK_LOST 1
unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
atomic_t ls_count;
fl_owner_t ls_owner;
};
/* bits for nfs4_state->flags */
enum {
LK_STATE_IN_USE,
NFS_DELEGATED_STATE, /* Current stateid is delegation */
NFS_OPEN_STATE, /* OPEN stateid is set */
NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
};
struct nfs4_state {
struct list_head open_states; /* List of states for the same state_owner */
struct list_head inode_states; /* List of states for the same inode */
struct list_head lock_states; /* List of subservient lock stateids */
struct nfs4_state_owner *owner; /* Pointer to the open owner */
struct inode *inode; /* Pointer to the inode */
unsigned long flags; /* Do we hold any locks? */
spinlock_t state_lock; /* Protects the lock_states list */
seqlock_t seqlock; /* Protects the stateid/open_stateid */
nfs4_stateid stateid; /* Current stateid: may be delegation */
nfs4_stateid open_stateid; /* OPEN stateid */
/* The following 3 fields are protected by owner->so_lock */
unsigned int n_rdonly; /* Number of read-only references */
unsigned int n_wronly; /* Number of write-only references */
unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */
atomic_t count;
};
struct nfs4_exception {
long timeout;
int retry;
struct nfs4_state *state;
struct inode *inode;
};
struct nfs4_state_recovery_ops {
int owner_flag_bit;
int state_flag_bit;
int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
int (*recover_lock)(struct nfs4_state *, struct file_lock *);
int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *);
int (*detect_trunking)(struct nfs_client *, struct nfs_client **,
struct rpc_cred *);
};
struct nfs4_state_maintenance_ops {
int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned);
struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
};
struct nfs4_mig_recovery_ops {
int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
struct page *, struct rpc_cred *);
int (*fsid_present)(struct inode *, struct rpc_cred *);
};
extern const struct dentry_operations nfs4_dentry_operations;
/* dir.c */
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
unsigned, umode_t, int *);
/* super.c */
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
/* nfs4proc.c */
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
extern int nfs4_destroy_clientid(struct nfs_client *clp);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
struct page *page, struct rpc_cred *);
extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
struct nfs_fh *, struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
#if defined(CONFIG_NFS_V4_1)
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
{
return server->nfs_client->cl_session;
}
extern int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
struct rpc_task *task);
extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
bool sync);
static inline bool
is_ds_only_client(struct nfs_client *clp)
{
return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
EXCHGID4_FLAG_USE_PNFS_DS;
}
static inline bool
is_ds_client(struct nfs_client *clp)
{
return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
}
static inline bool
_nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
struct rpc_clnt **clntp, struct rpc_message *msg)
{
struct rpc_cred *newcred = NULL;
rpc_authflavor_t flavor;
if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
spin_lock(&clp->cl_lock);
if (clp->cl_machine_cred != NULL)
/* don't call get_rpccred on the machine cred -
* a reference will be held for life of clp */
newcred = clp->cl_machine_cred;
spin_unlock(&clp->cl_lock);
msg->rpc_cred = newcred;
flavor = clp->cl_rpcclient->cl_auth->au_flavor;
WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I &&
flavor != RPC_AUTH_GSS_KRB5P);
*clntp = clp->cl_rpcclient;
return true;
}
return false;
}
/*
* Function responsible for determining if an rpc_message should use the
* machine cred under SP4_MACH_CRED and if so switching the credential and
* authflavor (using the nfs_client's rpc_clnt which will be krb5i/p).
* Should be called before rpc_call_sync/rpc_call_async.
*/
static inline void
nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
struct rpc_clnt **clntp, struct rpc_message *msg)
{
_nfs4_state_protect(clp, sp4_mode, clntp, msg);
}
/*
* Special wrapper to nfs4_state_protect for write.
* If WRITE can use machine cred but COMMIT cannot, make sure all writes
* that use machine cred use NFS_FILE_SYNC.
*/
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
struct rpc_message *msg, struct nfs_pgio_header *hdr)
{
if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
!test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
hdr->args.stable = NFS_FILE_SYNC;
}
#else /* CONFIG_NFS_v4_1 */
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
{
return NULL;
}
static inline bool
is_ds_only_client(struct nfs_client *clp)
{
return false;
}
static inline bool
is_ds_client(struct nfs_client *clp)
{
return false;
}
static inline void
nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
struct rpc_clnt **clntp, struct rpc_message *msg)
{
}
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
struct rpc_message *msg, struct nfs_pgio_header *hdr)
{
}
#endif /* CONFIG_NFS_V4_1 */
extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
extern const u32 nfs4_fattr_bitmap[3];
extern const u32 nfs4_statfs_bitmap[3];
extern const u32 nfs4_pathconf_bitmap[3];
extern const u32 nfs4_fsinfo_bitmap[3];
extern const u32 nfs4_fs_locations_bitmap[3];
void nfs40_shutdown_client(struct nfs_client *);
void nfs41_shutdown_client(struct nfs_client *);
int nfs40_init_client(struct nfs_client *);
int nfs41_init_client(struct nfs_client *);
void nfs4_free_client(struct nfs_client *);
struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *);
/* nfs4renewd.c */
extern void nfs4_schedule_state_renewal(struct nfs_client *);
extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
/* nfs4state.c */
struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
int nfs4_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **);
int nfs40_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **, struct rpc_cred *);
#if defined(CONFIG_NFS_V4_1)
int nfs41_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **, struct rpc_cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp);
extern void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp);
#else
static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern void nfs4_purge_state_owners(struct nfs_server *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs_inode_find_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_server_scope(struct nfs_client *,
struct nfs41_server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
fmode_t, const struct nfs_lockowner *);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid);
extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
extern const nfs4_stateid zero_stateid;
/* nfs4super.c */
struct nfs_mount_info;
extern struct nfs_subversion nfs_v4;
struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *);
extern bool nfs4_disable_idmapping;
extern unsigned short max_session_slots;
extern unsigned short send_implementation_id;
extern bool recover_lost_locks;
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
/* nfs4sysctl.c */
#ifdef CONFIG_SYSCTL
int nfs4_register_sysctl(void);
void nfs4_unregister_sysctl(void);
#else
static inline int nfs4_register_sysctl(void)
{
return 0;
}
static inline void nfs4_unregister_sysctl(void)
{
}
#endif
/* nfs4xdr.c */
extern struct rpc_procinfo nfs4_procedures[];
struct nfs4_mount_data;
/* callback_xdr.c */
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;
static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
{
memcpy(dst, src, sizeof(*dst));
}
static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src)
{
return memcmp(dst, src, sizeof(*dst)) == 0;
}
static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
{
return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0;
}
static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2)
{
return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
}
static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
{
return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
}
#else
#define nfs4_close_state(a, b) do { } while (0)
#define nfs4_close_sync(a, b) do { } while (0)
#define nfs4_state_protect(a, b, c, d) do { } while (0)
#define nfs4_state_protect_write(a, b, c, d) do { } while (0)
#endif /* CONFIG_NFS_V4 */
#endif /* __LINUX_FS_NFS_NFS4_FS.H */

1227
fs/nfs/nfs4client.c Normal file

File diff suppressed because it is too large Load diff

160
fs/nfs/nfs4file.c Normal file
View file

@ -0,0 +1,160 @@
/*
* linux/fs/nfs/file.c
*
* Copyright (C) 1992 Rick Sladkey
*/
#include <linux/nfs_fs.h>
#include "internal.h"
#include "fscache.h"
#include "pnfs.h"
#ifdef CONFIG_NFS_V4_2
#include "nfs42.h"
#endif
#define NFSDBG_FACILITY NFSDBG_FILE
static int
nfs4_file_open(struct inode *inode, struct file *filp)
{
struct nfs_open_context *ctx;
struct dentry *dentry = filp->f_path.dentry;
struct dentry *parent = NULL;
struct inode *dir;
unsigned openflags = filp->f_flags;
struct iattr attr;
int opened = 0;
int err;
/*
* If no cached dentry exists or if it's negative, NFSv4 handled the
* opens in ->lookup() or ->create().
*
* We only get this far for a cached positive dentry. We skipped
* revalidation, so handle it here by dropping the dentry and returning
* -EOPENSTALE. The VFS will retry the lookup/create/open.
*/
dprintk("NFS: open file(%pd2)\n", dentry);
if ((openflags & O_ACCMODE) == 3)
openflags--;
/* We can't create new files here */
openflags &= ~(O_CREAT|O_EXCL);
parent = dget_parent(dentry);
dir = parent->d_inode;
ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
err = PTR_ERR(ctx);
if (IS_ERR(ctx))
goto out;
attr.ia_valid = ATTR_OPEN;
if (openflags & O_TRUNC) {
attr.ia_valid |= ATTR_SIZE;
attr.ia_size = 0;
nfs_wb_all(inode);
}
inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
switch (err) {
case -EPERM:
case -EACCES:
case -EDQUOT:
case -ENOSPC:
case -EROFS:
goto out_put_ctx;
default:
goto out_drop;
}
}
if (inode != dentry->d_inode)
goto out_drop;
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
nfs_file_set_open_context(filp, ctx);
nfs_fscache_open_file(inode, filp);
err = 0;
out_put_ctx:
put_nfs_open_context(ctx);
out:
dput(parent);
return err;
out_drop:
d_drop(dentry);
err = -EOPENSTALE;
goto out_put_ctx;
}
static int
nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct inode *inode = file_inode(file);
do {
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
if (!ret)
ret = pnfs_layoutcommit_inode(inode, true);
mutex_unlock(&inode->i_mutex);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
* the NFS_CONTEXT_RESEND_WRITES flag
*/
start = 0;
end = LLONG_MAX;
} while (ret == -EAGAIN);
return ret;
}
#ifdef CONFIG_NFS_V4_2
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
loff_t ret;
switch (whence) {
case SEEK_HOLE:
case SEEK_DATA:
ret = nfs42_proc_llseek(filep, offset, whence);
if (ret != -ENOTSUPP)
return ret;
default:
return nfs_file_llseek(filep, offset, whence);
}
}
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
#ifdef CONFIG_NFS_V4_2
.llseek = nfs4_file_llseek,
#else
.llseek = nfs_file_llseek,
#endif
.read = new_sync_read,
.write = new_sync_write,
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs4_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
.fsync = nfs4_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
};

50
fs/nfs/nfs4getroot.c Normal file
View file

@ -0,0 +1,50 @@
/*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe)
{
struct nfs_fsinfo fsinfo;
int ret = -ENOMEM;
dprintk("--> nfs4_get_rootfh()\n");
fsinfo.fattr = nfs_alloc_fattr();
if (fsinfo.fattr == NULL)
goto out;
/* Start by getting the root filehandle from the server */
ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe);
if (ret < 0) {
dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
goto out;
}
if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
|| !S_ISDIR(fsinfo.fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
ret = -ENOTDIR;
goto out;
}
if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot obtained referral\n");
ret = -EREMOTE;
goto out;
}
memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
out:
nfs_free_fattr(fsinfo.fattr);
dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
return ret;
}

523
fs/nfs/nfs4namespace.c Normal file
View file

@ -0,0 +1,523 @@
/*
* linux/fs/nfs/nfs4namespace.c
*
* Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
* - Modified by David Howells <dhowells@redhat.com>
*
* NFSv4 namespace
*/
#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
#include <linux/vfs.h>
#include <linux/inet.h>
#include "internal.h"
#include "nfs4_fs.h"
#include "dns_resolve.h"
#define NFSDBG_FACILITY NFSDBG_VFS
/*
* Convert the NFSv4 pathname components into a standard posix path.
*
* Note that the resulting string will be placed at the end of the buffer
*/
static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
char *buffer, ssize_t buflen)
{
char *end = buffer + buflen;
int n;
*--end = '\0';
buflen--;
n = pathname->ncomponents;
while (--n >= 0) {
const struct nfs4_string *component = &pathname->components[n];
buflen -= component->len + 1;
if (buflen < 0)
goto Elong;
end -= component->len;
memcpy(end, component->data, component->len);
*--end = '/';
}
return end;
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
/*
* return the path component of "<server>:<path>"
* nfspath - the "<server>:<path>" string
* end - one past the last char that could contain "<server>:"
* returns NULL on failure
*/
static char *nfs_path_component(const char *nfspath, const char *end)
{
char *p;
if (*nfspath == '[') {
/* parse [] escaped IPv6 addrs */
p = strchr(nfspath, ']');
if (p != NULL && ++p < end && *p == ':')
return p + 1;
} else {
/* otherwise split on first colon */
p = strchr(nfspath, ':');
if (p != NULL && p < end)
return p + 1;
}
return NULL;
}
/*
* Determine the mount path as a string
*/
static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
{
char *limit;
char *path = nfs_path(&limit, dentry, buffer, buflen,
NFS_PATH_CANONICAL);
if (!IS_ERR(path)) {
char *path_component = nfs_path_component(path, limit);
if (path_component)
return path_component;
}
return path;
}
/*
* Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
* believe to be the server path to this dentry
*/
static int nfs4_validate_fspath(struct dentry *dentry,
const struct nfs4_fs_locations *locations,
char *page, char *page2)
{
const char *path, *fs_path;
path = nfs4_path(dentry, page, PAGE_SIZE);
if (IS_ERR(path))
return PTR_ERR(path);
fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
if (IS_ERR(fs_path))
return PTR_ERR(fs_path);
if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
dprintk("%s: path %s does not begin with fsroot %s\n",
__func__, path, fs_path);
return -ENOENT;
}
return 0;
}
static size_t nfs_parse_server_name(char *string, size_t len,
struct sockaddr *sa, size_t salen, struct net *net)
{
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
ret = nfs_dns_resolve_name(net, string, len, sa, salen);
if (ret < 0)
ret = 0;
}
return ret;
}
/**
* nfs_find_best_sec - Find a security mechanism supported locally
* @server: NFS server struct
* @flavors: List of security tuples returned by SECINFO procedure
*
* Return an rpc client that uses the first security mechanism in
* "flavors" that is locally supported. The "flavors" array
* is searched in the order returned from the server, per RFC 3530
* recommendation and each flavor is checked for membership in the
* sec= mount option list if it exists.
*
* Return -EPERM if no matching flavor is found in the array.
*
* Please call rpc_shutdown_client() when you are done with this rpc client.
*
*/
static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
struct nfs_server *server,
struct nfs4_secinfo_flavors *flavors)
{
rpc_authflavor_t pflavor;
struct nfs4_secinfo4 *secinfo;
unsigned int i;
for (i = 0; i < flavors->num_flavors; i++) {
secinfo = &flavors->flavors[i];
switch (secinfo->flavor) {
case RPC_AUTH_NULL:
case RPC_AUTH_UNIX:
case RPC_AUTH_GSS:
pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
&secinfo->flavor_info);
/* does the pseudoflavor match a sec= mount opt? */
if (pflavor != RPC_AUTH_MAXFLAVOR &&
nfs_auth_info_match(&server->auth_info, pflavor)) {
struct rpc_clnt *new;
struct rpc_cred *cred;
/* Cloning creates an rpc_auth for the flavor */
new = rpc_clone_client_set_auth(clnt, pflavor);
if (IS_ERR(new))
continue;
/**
* Check that the user actually can use the
* flavor. This is mostly for RPC_AUTH_GSS
* where cr_init obtains a gss context
*/
cred = rpcauth_lookupcred(new->cl_auth, 0);
if (IS_ERR(cred)) {
rpc_shutdown_client(new);
continue;
}
put_rpccred(cred);
return new;
}
}
}
return ERR_PTR(-EPERM);
}
/**
* nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
* return an rpc_clnt that uses the best available security flavor with
* respect to the secinfo flavor list and the sec= mount options.
*
* @clnt: RPC client to clone
* @inode: directory inode
* @name: lookup name
*
* Please call rpc_shutdown_client() when you are done with this rpc client.
*/
struct rpc_clnt *
nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
struct qstr *name)
{
struct page *page;
struct nfs4_secinfo_flavors *flavors;
struct rpc_clnt *new;
int err;
page = alloc_page(GFP_KERNEL);
if (!page)
return ERR_PTR(-ENOMEM);
flavors = page_address(page);
err = nfs4_proc_secinfo(inode, name, flavors);
if (err < 0) {
new = ERR_PTR(err);
goto out;
}
new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
out:
put_page(page);
return new;
}
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
char *page, char *page2,
const struct nfs4_fs_location *location)
{
const size_t addr_bufsize = sizeof(struct sockaddr_storage);
struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client);
struct vfsmount *mnt = ERR_PTR(-ENOENT);
char *mnt_path;
unsigned int maxbuflen;
unsigned int s;
mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
if (IS_ERR(mnt_path))
return ERR_CAST(mnt_path);
mountdata->mnt_path = mnt_path;
maxbuflen = mnt_path - 1 - page2;
mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
if (mountdata->addr == NULL)
return ERR_PTR(-ENOMEM);
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
if (buf->len <= 0 || buf->len >= maxbuflen)
continue;
if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
continue;
mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
mountdata->addr, addr_bufsize, net);
if (mountdata->addrlen == 0)
continue;
rpc_set_port(mountdata->addr, NFS_PORT);
memcpy(page2, buf->data, buf->len);
page2[buf->len] = '\0';
mountdata->hostname = page2;
snprintf(page, PAGE_SIZE, "%s:%s",
mountdata->hostname,
mountdata->mnt_path);
mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
if (!IS_ERR(mnt))
break;
}
kfree(mountdata->addr);
return mnt;
}
/**
* nfs_follow_referral - set up mountpoint when hitting a referral on moved error
* @dentry - parent directory
* @locations - array of NFSv4 server location information
*
*/
static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
const struct nfs4_fs_locations *locations)
{
struct vfsmount *mnt = ERR_PTR(-ENOENT);
struct nfs_clone_mount mountdata = {
.sb = dentry->d_sb,
.dentry = dentry,
.authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
};
char *page = NULL, *page2 = NULL;
int loc, error;
if (locations == NULL || locations->nlocations <= 0)
goto out;
dprintk("%s: referral at %pd2\n", __func__, dentry);
page = (char *) __get_free_page(GFP_USER);
if (!page)
goto out;
page2 = (char *) __get_free_page(GFP_USER);
if (!page2)
goto out;
/* Ensure fs path is a prefix of current dentry path */
error = nfs4_validate_fspath(dentry, locations, page, page2);
if (error < 0) {
mnt = ERR_PTR(error);
goto out;
}
for (loc = 0; loc < locations->nlocations; loc++) {
const struct nfs4_fs_location *location = &locations->locations[loc];
if (location == NULL || location->nservers <= 0 ||
location->rootpath.ncomponents == 0)
continue;
mnt = try_location(&mountdata, page, page2, location);
if (!IS_ERR(mnt))
break;
}
out:
free_page((unsigned long) page);
free_page((unsigned long) page2);
dprintk("%s: done\n", __func__);
return mnt;
}
/*
* nfs_do_refmount - handle crossing a referral on server
* @dentry - dentry of referral
*
*/
static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
{
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct dentry *parent;
struct nfs4_fs_locations *fs_locations = NULL;
struct page *page;
int err;
/* BUG_ON(IS_ROOT(dentry)); */
dprintk("%s: enter\n", __func__);
page = alloc_page(GFP_KERNEL);
if (page == NULL)
goto out;
fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
if (fs_locations == NULL)
goto out_free;
/* Get locations */
mnt = ERR_PTR(-ENOENT);
parent = dget_parent(dentry);
dprintk("%s: getting locations for %pd2\n",
__func__, dentry);
err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
dput(parent);
if (err != 0 ||
fs_locations->nlocations <= 0 ||
fs_locations->fs_path.ncomponents <= 0)
goto out_free;
mnt = nfs_follow_referral(dentry, fs_locations);
out_free:
__free_page(page);
kfree(fs_locations);
out:
dprintk("%s: done\n", __func__);
return mnt;
}
struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr)
{
rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
struct dentry *parent = dget_parent(dentry);
struct inode *dir = parent->d_inode;
struct qstr *name = &dentry->d_name;
struct rpc_clnt *client;
struct vfsmount *mnt;
/* Look it up again to get its attributes and sec flavor */
client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
dput(parent);
if (IS_ERR(client))
return ERR_CAST(client);
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
mnt = nfs_do_refmount(client, dentry);
goto out;
}
if (client->cl_auth->au_flavor != flavor)
flavor = client->cl_auth->au_flavor;
mnt = nfs_do_submount(dentry, fh, fattr, flavor);
out:
rpc_shutdown_client(client);
return mnt;
}
/*
* Try one location from the fs_locations array.
*
* Returns zero on success, or a negative errno value.
*/
static int nfs4_try_replacing_one_location(struct nfs_server *server,
char *page, char *page2,
const struct nfs4_fs_location *location)
{
const size_t addr_bufsize = sizeof(struct sockaddr_storage);
struct net *net = rpc_net_ns(server->client);
struct sockaddr *sap;
unsigned int s;
size_t salen;
int error;
sap = kmalloc(addr_bufsize, GFP_KERNEL);
if (sap == NULL)
return -ENOMEM;
error = -ENOENT;
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
char *hostname;
if (buf->len <= 0 || buf->len > PAGE_SIZE)
continue;
if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL)
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
sap, addr_bufsize, net);
if (salen == 0)
continue;
rpc_set_port(sap, NFS_PORT);
error = -ENOMEM;
hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
if (hostname == NULL)
break;
error = nfs4_update_server(server, hostname, sap, salen, net);
kfree(hostname);
if (error == 0)
break;
}
kfree(sap);
return error;
}
/**
* nfs4_replace_transport - set up transport to destination server
*
* @server: export being migrated
* @locations: fs_locations array
*
* Returns zero on success, or a negative errno value.
*
* The client tries all the entries in the "locations" array, in the
* order returned by the server, until one works or the end of the
* array is reached.
*/
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations)
{
char *page = NULL, *page2 = NULL;
int loc, error;
error = -ENOENT;
if (locations == NULL || locations->nlocations <= 0)
goto out;
error = -ENOMEM;
page = (char *) __get_free_page(GFP_USER);
if (!page)
goto out;
page2 = (char *) __get_free_page(GFP_USER);
if (!page2)
goto out;
for (loc = 0; loc < locations->nlocations; loc++) {
const struct nfs4_fs_location *location =
&locations->locations[loc];
if (location == NULL || location->nservers <= 0 ||
location->rootpath.ncomponents == 0)
continue;
error = nfs4_try_replacing_one_location(server, page,
page2, location);
if (error == 0)
break;
}
out:
free_page((unsigned long)page);
free_page((unsigned long)page2);
return error;
}

8559
fs/nfs/nfs4proc.c Normal file

File diff suppressed because it is too large Load diff

143
fs/nfs/nfs4renewd.c Normal file
View file

@ -0,0 +1,143 @@
/*
* fs/nfs/nfs4renewd.c
*
* Copyright (c) 2002 The Regents of the University of Michigan.
* All rights reserved.
*
* Kendrick Smith <kmsmith@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Implementation of the NFSv4 "renew daemon", which wakes up periodically to
* send a RENEW, to keep state alive on the server. The daemon is implemented
* as an rpc_task, not a real kernel thread, so it always runs in rpciod's
* context. There is one renewd per nfs_server.
*
*/
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_STATE
void
nfs4_renew_state(struct work_struct *work)
{
const struct nfs4_state_maintenance_ops *ops;
struct nfs_client *clp =
container_of(work, struct nfs_client, cl_renewd.work);
struct rpc_cred *cred;
long lease;
unsigned long last, now;
unsigned renew_flags = 0;
ops = clp->cl_mvops->state_renewal_ops;
dprintk("%s: start\n", __func__);
if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
goto out;
spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
last = clp->cl_last_renewal;
now = jiffies;
/* Are we close to a lease timeout? */
if (time_after(now, last + lease/3))
renew_flags |= NFS4_RENEW_TIMEOUT;
if (nfs_delegations_present(clp))
renew_flags |= NFS4_RENEW_DELEGATION_CB;
if (renew_flags != 0) {
cred = ops->get_state_renewal_cred_locked(clp);
spin_unlock(&clp->cl_lock);
if (cred == NULL) {
if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) {
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
nfs_expire_all_delegations(clp);
} else {
int ret;
/* Queue an asynchronous RENEW. */
ret = ops->sched_state_renewal(clp, cred, renew_flags);
put_rpccred(cred);
switch (ret) {
default:
goto out_exp;
case -EAGAIN:
case -ENOMEM:
break;
}
}
} else {
dprintk("%s: failed to call renewd. Reason: lease not expired \n",
__func__);
spin_unlock(&clp->cl_lock);
}
nfs4_schedule_state_renewal(clp);
out_exp:
nfs_expire_unreferenced_delegations(clp);
out:
dprintk("%s: done\n", __func__);
}
void
nfs4_schedule_state_renewal(struct nfs_client *clp)
{
long timeout;
spin_lock(&clp->cl_lock);
timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal
- (long)jiffies;
if (timeout < 5 * HZ)
timeout = 5 * HZ;
dprintk("%s: requeueing work. Lease period = %ld\n",
__func__, (timeout + HZ - 1) / HZ);
mod_delayed_work(system_wq, &clp->cl_renewd, timeout);
set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
spin_unlock(&clp->cl_lock);
}
void
nfs4_kill_renewd(struct nfs_client *clp)
{
cancel_delayed_work_sync(&clp->cl_renewd);
}
/*
* Local variables:
* c-basic-offset: 8
* End:
*/

565
fs/nfs/nfs4session.c Normal file
View file

@ -0,0 +1,565 @@
/*
* fs/nfs/nfs4session.c
*
* Copyright (c) 2012 Trond Myklebust <Trond.Myklebust@netapp.com>
*
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/module.h>
#include "nfs4_fs.h"
#include "internal.h"
#include "nfs4session.h"
#include "callback.h"
#define NFSDBG_FACILITY NFSDBG_STATE
static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue)
{
tbl->highest_used_slotid = NFS4_NO_SLOT;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
init_completion(&tbl->complete);
}
/*
* nfs4_shrink_slot_table - free retired slots from the slot table
*/
static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize)
{
struct nfs4_slot **p;
if (newsize >= tbl->max_slots)
return;
p = &tbl->slots;
while (newsize--)
p = &(*p)->next;
while (*p) {
struct nfs4_slot *slot = *p;
*p = slot->next;
kfree(slot);
tbl->max_slots--;
}
}
/**
* nfs4_slot_tbl_drain_complete - wake waiters when drain is complete
* @tbl - controlling slot table
*
*/
void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
{
if (nfs4_slot_tbl_draining(tbl))
complete(&tbl->complete);
}
/*
* nfs4_free_slot - free a slot and efficiently update slot table.
*
* freeing a slot is trivially done by clearing its respective bit
* in the bitmap.
* If the freed slotid equals highest_used_slotid we want to update it
* so that the server would be able to size down the slot table if needed,
* otherwise we know that the highest_used_slotid is still in use.
* When updating highest_used_slotid there may be "holes" in the bitmap
* so we need to scan down from highest_used_slotid to 0 looking for the now
* highest slotid in use.
* If none found, highest_used_slotid is set to NFS4_NO_SLOT.
*
* Must be called while holding tbl->slot_tbl_lock
*/
void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
{
u32 slotid = slot->slot_nr;
/* clear used bit in bitmap */
__clear_bit(slotid, tbl->used_slots);
/* update highest_used_slotid when it is freed */
if (slotid == tbl->highest_used_slotid) {
u32 new_max = find_last_bit(tbl->used_slots, slotid);
if (new_max < slotid)
tbl->highest_used_slotid = new_max;
else {
tbl->highest_used_slotid = NFS4_NO_SLOT;
nfs4_slot_tbl_drain_complete(tbl);
}
}
dprintk("%s: slotid %u highest_used_slotid %u\n", __func__,
slotid, tbl->highest_used_slotid);
}
static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl,
u32 slotid, u32 seq_init, gfp_t gfp_mask)
{
struct nfs4_slot *slot;
slot = kzalloc(sizeof(*slot), gfp_mask);
if (slot) {
slot->table = tbl;
slot->slot_nr = slotid;
slot->seq_nr = seq_init;
}
return slot;
}
static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl,
u32 slotid, u32 seq_init, gfp_t gfp_mask)
{
struct nfs4_slot **p, *slot;
p = &tbl->slots;
for (;;) {
if (*p == NULL) {
*p = nfs4_new_slot(tbl, tbl->max_slots,
seq_init, gfp_mask);
if (*p == NULL)
break;
tbl->max_slots++;
}
slot = *p;
if (slot->slot_nr == slotid)
return slot;
p = &slot->next;
}
return ERR_PTR(-ENOMEM);
}
/*
* nfs4_alloc_slot - efficiently look for a free slot
*
* nfs4_alloc_slot looks for an unset bit in the used_slots bitmap.
* If found, we mark the slot as used, update the highest_used_slotid,
* and respectively set up the sequence operation args.
*
* Note: must be called with under the slot_tbl_lock.
*/
struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
{
struct nfs4_slot *ret = ERR_PTR(-EBUSY);
u32 slotid;
dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
__func__, tbl->used_slots[0], tbl->highest_used_slotid,
tbl->max_slotid + 1);
slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
if (slotid > tbl->max_slotid)
goto out;
ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
if (IS_ERR(ret))
goto out;
__set_bit(slotid, tbl->used_slots);
if (slotid > tbl->highest_used_slotid ||
tbl->highest_used_slotid == NFS4_NO_SLOT)
tbl->highest_used_slotid = slotid;
ret->generation = tbl->generation;
out:
dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n",
__func__, tbl->used_slots[0], tbl->highest_used_slotid,
!IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT);
return ret;
}
static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl,
u32 max_reqs, u32 ivalue)
{
if (max_reqs <= tbl->max_slots)
return 0;
if (!IS_ERR(nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS)))
return 0;
return -ENOMEM;
}
static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
u32 server_highest_slotid,
u32 ivalue)
{
struct nfs4_slot **p;
nfs4_shrink_slot_table(tbl, server_highest_slotid + 1);
p = &tbl->slots;
while (*p) {
(*p)->seq_nr = ivalue;
(*p)->interrupted = 0;
p = &(*p)->next;
}
tbl->highest_used_slotid = NFS4_NO_SLOT;
tbl->target_highest_slotid = server_highest_slotid;
tbl->server_highest_slotid = server_highest_slotid;
tbl->d_target_highest_slotid = 0;
tbl->d2_target_highest_slotid = 0;
tbl->max_slotid = server_highest_slotid;
}
/*
* (re)Initialise a slot table
*/
static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl,
u32 max_reqs, u32 ivalue)
{
int ret;
dprintk("--> %s: max_reqs=%u, tbl->max_slots %u\n", __func__,
max_reqs, tbl->max_slots);
if (max_reqs > NFS4_MAX_SLOT_TABLE)
max_reqs = NFS4_MAX_SLOT_TABLE;
ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue);
if (ret)
goto out;
spin_lock(&tbl->slot_tbl_lock);
nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue);
spin_unlock(&tbl->slot_tbl_lock);
dprintk("%s: tbl=%p slots=%p max_slots=%u\n", __func__,
tbl, tbl->slots, tbl->max_slots);
out:
dprintk("<-- %s: return %d\n", __func__, ret);
return ret;
}
/*
* nfs4_release_slot_table - release all slot table entries
*/
static void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
{
nfs4_shrink_slot_table(tbl, 0);
}
/**
* nfs4_shutdown_slot_table - release resources attached to a slot table
* @tbl: slot table to shut down
*
*/
void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl)
{
nfs4_release_slot_table(tbl);
rpc_destroy_wait_queue(&tbl->slot_tbl_waitq);
}
/**
* nfs4_setup_slot_table - prepare a stand-alone slot table for use
* @tbl: slot table to set up
* @max_reqs: maximum number of requests allowed
* @queue: name to give RPC wait queue
*
* Returns zero on success, or a negative errno.
*/
int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs,
const char *queue)
{
nfs4_init_slot_table(tbl, queue);
return nfs4_realloc_slot_table(tbl, max_reqs, 0);
}
static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
{
struct nfs4_sequence_args *args = task->tk_msg.rpc_argp;
struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
struct nfs4_slot *slot = pslot;
struct nfs4_slot_table *tbl = slot->table;
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
return false;
slot->generation = tbl->generation;
args->sa_slot = slot;
res->sr_timestamp = jiffies;
res->sr_slot = slot;
res->sr_status_flags = 0;
res->sr_status = 1;
return true;
}
static bool __nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
struct nfs4_slot *slot)
{
if (rpc_wake_up_first(&tbl->slot_tbl_waitq, nfs41_assign_slot, slot))
return true;
return false;
}
bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
struct nfs4_slot *slot)
{
if (slot->slot_nr > tbl->max_slotid)
return false;
return __nfs41_wake_and_assign_slot(tbl, slot);
}
static bool nfs41_try_wake_next_slot_table_entry(struct nfs4_slot_table *tbl)
{
struct nfs4_slot *slot = nfs4_alloc_slot(tbl);
if (!IS_ERR(slot)) {
bool ret = __nfs41_wake_and_assign_slot(tbl, slot);
if (ret)
return ret;
nfs4_free_slot(tbl, slot);
}
return false;
}
void nfs41_wake_slot_table(struct nfs4_slot_table *tbl)
{
for (;;) {
if (!nfs41_try_wake_next_slot_table_entry(tbl))
break;
}
}
#if defined(CONFIG_NFS_V4_1)
static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl,
u32 target_highest_slotid)
{
u32 max_slotid;
max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, target_highest_slotid);
if (max_slotid > tbl->server_highest_slotid)
max_slotid = tbl->server_highest_slotid;
if (max_slotid > tbl->target_highest_slotid)
max_slotid = tbl->target_highest_slotid;
tbl->max_slotid = max_slotid;
nfs41_wake_slot_table(tbl);
}
/* Update the client's idea of target_highest_slotid */
static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
u32 target_highest_slotid)
{
if (tbl->target_highest_slotid == target_highest_slotid)
return;
tbl->target_highest_slotid = target_highest_slotid;
tbl->generation++;
}
void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
u32 target_highest_slotid)
{
spin_lock(&tbl->slot_tbl_lock);
nfs41_set_target_slotid_locked(tbl, target_highest_slotid);
tbl->d_target_highest_slotid = 0;
tbl->d2_target_highest_slotid = 0;
nfs41_set_max_slotid_locked(tbl, target_highest_slotid);
spin_unlock(&tbl->slot_tbl_lock);
}
static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl,
u32 highest_slotid)
{
if (tbl->server_highest_slotid == highest_slotid)
return;
if (tbl->highest_used_slotid > highest_slotid)
return;
/* Deallocate slots */
nfs4_shrink_slot_table(tbl, highest_slotid + 1);
tbl->server_highest_slotid = highest_slotid;
}
static s32 nfs41_derivative_target_slotid(s32 s1, s32 s2)
{
s1 -= s2;
if (s1 == 0)
return 0;
if (s1 < 0)
return (s1 - 1) >> 1;
return (s1 + 1) >> 1;
}
static int nfs41_sign_s32(s32 s1)
{
if (s1 > 0)
return 1;
if (s1 < 0)
return -1;
return 0;
}
static bool nfs41_same_sign_or_zero_s32(s32 s1, s32 s2)
{
if (!s1 || !s2)
return true;
return nfs41_sign_s32(s1) == nfs41_sign_s32(s2);
}
/* Try to eliminate outliers by checking for sharp changes in the
* derivatives and second derivatives
*/
static bool nfs41_is_outlier_target_slotid(struct nfs4_slot_table *tbl,
u32 new_target)
{
s32 d_target, d2_target;
bool ret = true;
d_target = nfs41_derivative_target_slotid(new_target,
tbl->target_highest_slotid);
d2_target = nfs41_derivative_target_slotid(d_target,
tbl->d_target_highest_slotid);
/* Is first derivative same sign? */
if (nfs41_same_sign_or_zero_s32(d_target, tbl->d_target_highest_slotid))
ret = false;
/* Is second derivative same sign? */
if (nfs41_same_sign_or_zero_s32(d2_target, tbl->d2_target_highest_slotid))
ret = false;
tbl->d_target_highest_slotid = d_target;
tbl->d2_target_highest_slotid = d2_target;
return ret;
}
void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
struct nfs4_slot *slot,
struct nfs4_sequence_res *res)
{
spin_lock(&tbl->slot_tbl_lock);
if (!nfs41_is_outlier_target_slotid(tbl, res->sr_target_highest_slotid))
nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid);
if (tbl->generation == slot->generation)
nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid);
nfs41_set_max_slotid_locked(tbl, res->sr_target_highest_slotid);
spin_unlock(&tbl->slot_tbl_lock);
}
static void nfs4_release_session_slot_tables(struct nfs4_session *session)
{
nfs4_release_slot_table(&session->fc_slot_table);
nfs4_release_slot_table(&session->bc_slot_table);
}
/*
* Initialize or reset the forechannel and backchannel tables
*/
int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
{
struct nfs4_slot_table *tbl;
int status;
dprintk("--> %s\n", __func__);
/* Fore channel */
tbl = &ses->fc_slot_table;
tbl->session = ses;
status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
if (status) /* -ENOMEM */
return status;
/* Back channel */
tbl = &ses->bc_slot_table;
tbl->session = ses;
status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
if (status && tbl->slots == NULL)
/* Fore and back channel share a connection so get
* both slot tables or neither */
nfs4_release_session_slot_tables(ses);
return status;
}
struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
{
struct nfs4_session *session;
session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
if (!session)
return NULL;
nfs4_init_slot_table(&session->fc_slot_table, "ForeChannel Slot table");
nfs4_init_slot_table(&session->bc_slot_table, "BackChannel Slot table");
session->session_state = 1<<NFS4_SESSION_INITING;
session->clp = clp;
return session;
}
static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
{
nfs4_shutdown_slot_table(&session->fc_slot_table);
nfs4_shutdown_slot_table(&session->bc_slot_table);
}
void nfs4_destroy_session(struct nfs4_session *session)
{
struct rpc_xprt *xprt;
struct rpc_cred *cred;
cred = nfs4_get_clid_cred(session->clp);
nfs4_proc_destroy_session(session, cred);
if (cred)
put_rpccred(cred);
rcu_read_lock();
xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
rcu_read_unlock();
dprintk("%s Destroy backchannel for xprt %p\n",
__func__, xprt);
xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
nfs4_destroy_session_slot_tables(session);
kfree(session);
}
/*
* With sessions, the client is not marked ready until after a
* successful EXCHANGE_ID and CREATE_SESSION.
*
* Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
* other versions of NFS can be tried.
*/
static int nfs41_check_session_ready(struct nfs_client *clp)
{
int ret;
if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
ret = nfs4_client_recover_expired_lease(clp);
if (ret)
return ret;
}
if (clp->cl_cons_state < NFS_CS_READY)
return -EPROTONOSUPPORT;
smp_rmb();
return 0;
}
int nfs4_init_session(struct nfs_client *clp)
{
if (!nfs4_has_session(clp))
return 0;
clear_bit(NFS4_SESSION_INITING, &clp->cl_session->session_state);
return nfs41_check_session_ready(clp);
}
int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
{
struct nfs4_session *session = clp->cl_session;
int ret;
spin_lock(&clp->cl_lock);
if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
/*
* Do not set NFS_CS_CHECK_LEASE_TIME instead set the
* DS lease to be equal to the MDS lease.
*/
clp->cl_lease_time = lease_time;
clp->cl_last_renewal = jiffies;
}
spin_unlock(&clp->cl_lock);
ret = nfs41_check_session_ready(clp);
if (ret)
return ret;
/* Test for the DS role */
if (!is_ds_client(clp))
return -ENODEV;
return 0;
}
EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
#endif /* defined(CONFIG_NFS_V4_1) */

153
fs/nfs/nfs4session.h Normal file
View file

@ -0,0 +1,153 @@
/*
* fs/nfs/nfs4session.h
*
* Copyright (c) 2012 Trond Myklebust <Trond.Myklebust@netapp.com>
*
*/
#ifndef __LINUX_FS_NFS_NFS4SESSION_H
#define __LINUX_FS_NFS_NFS4SESSION_H
/* maximum number of slots to use */
#define NFS4_DEF_SLOT_TABLE_SIZE (64U)
#define NFS4_MAX_SLOT_TABLE (1024U)
#define NFS4_NO_SLOT ((u32)-1)
#if IS_ENABLED(CONFIG_NFS_V4)
/* Sessions slot seqid */
struct nfs4_slot {
struct nfs4_slot_table *table;
struct nfs4_slot *next;
unsigned long generation;
u32 slot_nr;
u32 seq_nr;
unsigned int interrupted : 1;
};
/* Sessions */
enum nfs4_slot_tbl_state {
NFS4_SLOT_TBL_DRAINING,
};
#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
struct nfs4_slot_table {
struct nfs4_session *session; /* Parent session */
struct nfs4_slot *slots; /* seqid per slot */
unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
spinlock_t slot_tbl_lock;
struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */
u32 max_slots; /* # slots in table */
u32 max_slotid; /* Max allowed slotid value */
u32 highest_used_slotid; /* sent to server on each SEQ.
* op for dynamic resizing */
u32 target_highest_slotid; /* Server max_slot target */
u32 server_highest_slotid; /* Server highest slotid */
s32 d_target_highest_slotid; /* Derivative */
s32 d2_target_highest_slotid; /* 2nd derivative */
unsigned long generation; /* Generation counter for
target_highest_slotid */
struct completion complete;
unsigned long slot_tbl_state;
};
/*
* Session related parameters
*/
struct nfs4_session {
struct nfs4_sessionid sess_id;
u32 flags;
unsigned long session_state;
u32 hash_alg;
u32 ssv_len;
/* The fore and back channel */
struct nfs4_channel_attrs fc_attrs;
struct nfs4_slot_table fc_slot_table;
struct nfs4_channel_attrs bc_attrs;
struct nfs4_slot_table bc_slot_table;
struct nfs_client *clp;
};
enum nfs4_session_state {
NFS4_SESSION_INITING,
};
extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
unsigned int max_reqs, const char *queue);
extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
struct nfs4_slot *slot);
void nfs41_wake_slot_table(struct nfs4_slot_table *tbl);
static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
{
return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
}
#if defined(CONFIG_NFS_V4_1)
extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
u32 target_highest_slotid);
extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
struct nfs4_slot *slot,
struct nfs4_sequence_res *res);
extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses);
extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
extern void nfs4_destroy_session(struct nfs4_session *session);
extern int nfs4_init_session(struct nfs_client *clp);
extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
/*
* Determine if sessions are in use.
*/
static inline int nfs4_has_session(const struct nfs_client *clp)
{
if (clp->cl_session)
return 1;
return 0;
}
static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
{
if (nfs4_has_session(clp))
return (clp->cl_session->flags & SESSION4_PERSIST);
return 0;
}
#ifdef CONFIG_CRC32
/*
* nfs_session_id_hash - calculate the crc32 hash for the session id
* @session - pointer to session
*/
#define nfs_session_id_hash(sess_id) \
(~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
#else
#define nfs_session_id_hash(session) (0)
#endif
#else /* defined(CONFIG_NFS_V4_1) */
static inline int nfs4_init_session(struct nfs_client *clp)
{
return 0;
}
/*
* Determine if sessions are in use.
*/
static inline int nfs4_has_session(const struct nfs_client *clp)
{
return 0;
}
static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
{
return 0;
}
#endif /* defined(CONFIG_NFS_V4_1) */
#endif /* IS_ENABLED(CONFIG_NFS_V4) */
#endif /* __LINUX_FS_NFS_NFS4SESSION_H */

2428
fs/nfs/nfs4state.c Normal file

File diff suppressed because it is too large Load diff

358
fs/nfs/nfs4super.c Normal file
View file

@ -0,0 +1,358 @@
/*
* Copyright (c) 2012 Bryan Schumaker <bjschuma@netapp.com>
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/nfs_idmap.h>
#include <linux/nfs4_mount.h>
#include <linux/nfs_fs.h>
#include "delegation.h"
#include "internal.h"
#include "nfs4_fs.h"
#include "dns_resolve.h"
#include "pnfs.h"
#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc);
static void nfs4_evict_inode(struct inode *inode);
static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct file_system_type nfs4_remote_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
.mount = nfs4_remote_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
static struct file_system_type nfs4_remote_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
.mount = nfs4_remote_referral_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
struct file_system_type nfs4_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
.mount = nfs4_referral_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs4_write_inode,
.drop_inode = nfs_drop_inode,
.put_super = nfs_put_super,
.statfs = nfs_statfs,
.evict_inode = nfs4_evict_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
.show_devname = nfs_show_devname,
.show_path = nfs_show_path,
.show_stats = nfs_show_stats,
.remount_fs = nfs_remount,
};
struct nfs_subversion nfs_v4 = {
.owner = THIS_MODULE,
.nfs_fs = &nfs4_fs_type,
.rpc_vers = &nfs_version4,
.rpc_ops = &nfs_v4_clientops,
.sops = &nfs4_sops,
.xattr = nfs4_xattr_handlers,
};
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret = nfs_write_inode(inode, wbc);
if (ret == 0)
ret = pnfs_layoutcommit_inode(inode,
wbc->sync_mode == WB_SYNC_ALL);
return ret;
}
/*
* Clean out any remaining NFSv4 state that might be left over due
* to open() calls that passed nfs_atomic_lookup, but failed to call
* nfs_open().
*/
static void nfs4_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
}
/*
* Get the superblock for the NFS4 root partition
*/
static struct dentry *
nfs4_remote_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *info)
{
struct nfs_mount_info *mount_info = info;
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
mount_info->set_security = nfs_set_sb_security;
/* Get a volume representation */
server = nfs4_create_server(mount_info, &nfs_v4);
if (IS_ERR(server)) {
mntroot = ERR_CAST(server);
goto out;
}
mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4);
out:
return mntroot;
}
static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
int flags, void *data, const char *hostname)
{
struct vfsmount *root_mnt;
char *root_devname;
size_t len;
len = strlen(hostname) + 5;
root_devname = kmalloc(len, GFP_KERNEL);
if (root_devname == NULL)
return ERR_PTR(-ENOMEM);
/* Does hostname needs to be enclosed in brackets? */
if (strchr(hostname, ':'))
snprintf(root_devname, len, "[%s]:/", hostname);
else
snprintf(root_devname, len, "%s:/", hostname);
root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
kfree(root_devname);
return root_mnt;
}
struct nfs_referral_count {
struct list_head list;
const struct task_struct *task;
unsigned int referral_count;
};
static LIST_HEAD(nfs_referral_count_list);
static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
static struct nfs_referral_count *nfs_find_referral_count(void)
{
struct nfs_referral_count *p;
list_for_each_entry(p, &nfs_referral_count_list, list) {
if (p->task == current)
return p;
}
return NULL;
}
#define NFS_MAX_NESTED_REFERRALS 2
static int nfs_referral_loop_protect(void)
{
struct nfs_referral_count *p, *new;
int ret = -ENOMEM;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new)
goto out;
new->task = current;
new->referral_count = 1;
ret = 0;
spin_lock(&nfs_referral_count_list_lock);
p = nfs_find_referral_count();
if (p != NULL) {
if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
ret = -ELOOP;
else
p->referral_count++;
} else {
list_add(&new->list, &nfs_referral_count_list);
new = NULL;
}
spin_unlock(&nfs_referral_count_list_lock);
kfree(new);
out:
return ret;
}
static void nfs_referral_loop_unprotect(void)
{
struct nfs_referral_count *p;
spin_lock(&nfs_referral_count_list_lock);
p = nfs_find_referral_count();
p->referral_count--;
if (p->referral_count == 0)
list_del(&p->list);
else
p = NULL;
spin_unlock(&nfs_referral_count_list_lock);
kfree(p);
}
static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
const char *export_path)
{
struct dentry *dentry;
int err;
if (IS_ERR(root_mnt))
return ERR_CAST(root_mnt);
err = nfs_referral_loop_protect();
if (err) {
mntput(root_mnt);
return ERR_PTR(err);
}
dentry = mount_subtree(root_mnt, export_path);
nfs_referral_loop_unprotect();
return dentry;
}
struct dentry *nfs4_try_mount(int flags, const char *dev_name,
struct nfs_mount_info *mount_info,
struct nfs_subversion *nfs_mod)
{
char *export_path;
struct vfsmount *root_mnt;
struct dentry *res;
struct nfs_parsed_mount_data *data = mount_info->parsed;
dfprintk(MOUNT, "--> nfs4_try_mount()\n");
export_path = data->nfs_server.export_path;
data->nfs_server.export_path = "/";
root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
data->nfs_server.hostname);
data->nfs_server.export_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
PTR_ERR_OR_ZERO(res),
IS_ERR(res) ? " [error]" : "");
return res;
}
static struct dentry *
nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
struct nfs_mount_info mount_info = {
.fill_super = nfs_fill_super,
.set_security = nfs_clone_sb_security,
.cloned = raw_data,
};
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
dprintk("--> nfs4_referral_get_sb()\n");
mount_info.mntfh = nfs_alloc_fhandle();
if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
goto out;
/* create a new volume representation */
server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
if (IS_ERR(server)) {
mntroot = ERR_CAST(server);
goto out;
}
mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4);
out:
nfs_free_fhandle(mount_info.mntfh);
return mntroot;
}
/*
* Create an NFS4 server record on referral traversal
*/
static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
char *export_path;
struct vfsmount *root_mnt;
struct dentry *res;
dprintk("--> nfs4_referral_mount()\n");
export_path = data->mnt_path;
data->mnt_path = "/";
root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
flags, data, data->hostname);
data->mnt_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
dprintk("<-- nfs4_referral_mount() = %d%s\n",
PTR_ERR_OR_ZERO(res),
IS_ERR(res) ? " [error]" : "");
return res;
}
static int __init init_nfs_v4(void)
{
int err;
err = nfs_dns_resolver_init();
if (err)
goto out;
err = nfs_idmap_init();
if (err)
goto out1;
err = nfs4_register_sysctl();
if (err)
goto out2;
register_nfs_version(&nfs_v4);
return 0;
out2:
nfs_idmap_quit();
out1:
nfs_dns_resolver_destroy();
out:
return err;
}
static void __exit exit_nfs_v4(void)
{
unregister_nfs_version(&nfs_v4);
nfs4_unregister_sysctl();
nfs_idmap_quit();
nfs_dns_resolver_destroy();
}
MODULE_LICENSE("GPL");
module_init(init_nfs_v4);
module_exit(exit_nfs_v4);

69
fs/nfs/nfs4sysctl.c Normal file
View file

@ -0,0 +1,69 @@
/*
* linux/fs/nfs/nfs4sysctl.c
*
* Sysctl interface to NFS v4 parameters
*
* Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <linux/sysctl.h>
#include <linux/nfs_idmap.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "callback.h"
static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
static struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (int *)&nfs_set_port_min,
.extra2 = (int *)&nfs_set_port_max,
},
{
.procname = "idmap_cache_timeout",
.data = &nfs_idmap_cache_timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{ }
};
static struct ctl_table nfs4_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
.child = nfs4_cb_sysctls,
},
{ }
};
static struct ctl_table nfs4_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = nfs4_cb_sysctl_dir,
},
{ }
};
int nfs4_register_sysctl(void)
{
nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root);
if (nfs4_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
}
void nfs4_unregister_sysctl(void)
{
unregister_sysctl_table(nfs4_callback_sysctl_table);
nfs4_callback_sysctl_table = NULL;
}

17
fs/nfs/nfs4trace.c Normal file
View file

@ -0,0 +1,17 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "internal.h"
#include "nfs4session.h"
#include "callback.h"
#define CREATE_TRACE_POINTS
#include "nfs4trace.h"
#ifdef CONFIG_NFS_V4_1
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds);
#endif

1148
fs/nfs/nfs4trace.h Normal file

File diff suppressed because it is too large Load diff

7416
fs/nfs/nfs4xdr.c Normal file

File diff suppressed because it is too large Load diff

309
fs/nfs/nfsroot.c Normal file
View file

@ -0,0 +1,309 @@
/*
* Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de>
*
* Allow an NFS filesystem to be mounted as root. The way this works is:
* (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
* (2) Construct the device string and the options string using DHCP
* option 17 and/or kernel command line options.
* (3) When mount_root() sets up the root file system, pass these strings
* to the NFS client's regular mount interface via sys_mount().
*
*
* Changes:
*
* Alan Cox : Removed get_address name clash with FPU.
* Alan Cox : Reformatted a bit.
* Gero Kuhlmann : Code cleanup
* Michael Rausch : Fixed recognition of an incoming RARP answer.
* Martin Mares : (2.0) Auto-configuration via BOOTP supported.
* Martin Mares : Manual selection of interface & BOOTP/RARP.
* Martin Mares : Using network routes instead of host routes,
* allowing the default configuration to be used
* for normal operation of the host.
* Martin Mares : Randomized timer with exponential backoff
* installed to minimize network congestion.
* Martin Mares : Code cleanup.
* Martin Mares : (2.1) BOOTP and RARP made configuration options.
* Martin Mares : Server hostname generation fixed.
* Gerd Knorr : Fixed wired inode handling
* Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored.
* Martin Mares : RARP replies not tested for server address.
* Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please
* send me your new patches _before_ bothering
* Linus so that I don' always have to cleanup
* _afterwards_ - thanks)
* Gero Kuhlmann : Last changes of Martin Mares undone.
* Gero Kuhlmann : RARP replies are tested for specified server
* again. However, it's now possible to have
* different RARP and NFS servers.
* Gero Kuhlmann : "0.0.0.0" addresses from command line are
* now mapped to INADDR_NONE.
* Gero Kuhlmann : Fixed a bug which prevented BOOTP path name
* from being used (thanks to Leo Spiekman)
* Andy Walker : Allow to specify the NFS server in nfs_root
* without giving a path name
* Swen Thümmler : Allow to specify the NFS options in nfs_root
* without giving a path name. Fix BOOTP request
* for domainname (domainname is NIS domain, not
* DNS domain!). Skip dummy devices for BOOTP.
* Jacek Zapala : Fixed a bug which prevented server-ip address
* from nfsroot parameter from being used.
* Olaf Kirch : Adapted to new NFS code.
* Jakub Jelinek : Free used code segment.
* Marko Kohtala : Fixed some bugs.
* Martin Mares : Debug message cleanup
* Martin Mares : Changed to use the new generic IP layer autoconfig
* code. BOOTP and RARP moved there.
* Martin Mares : Default path now contains host name instead of
* host IP address (but host name defaults to IP
* address anyway).
* Martin Mares : Use root_server_addr appropriately during setup.
* Martin Mares : Rewrote parameter parsing, now hopefully giving
* correct overriding.
* Trond Myklebust : Add in preliminary support for NFSv3 and TCP.
* Fix bug in root_nfs_addr(). nfs_data.namlen
* is NOT for the length of the hostname.
* Hua Qin : Support for mounting root file system via
* NFS over TCP.
* Fabian Frederick: Option parser rebuilt (using parser lib)
* Chuck Lever : Use super.c's text-based mount option parsing
* Chuck Lever : Add "nfsrootdebug".
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/nfs.h>
#include <linux/nfs_fs.h>
#include <linux/utsname.h>
#include <linux/root_dev.h>
#include <net/ipconfig.h>
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_ROOT
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
/* Default NFSROOT mount options. */
#define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096"
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
/* Name of directory to mount */
static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
/* server:export path string passed to super.c */
static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
#ifdef NFS_DEBUG
/*
* When the "nfsrootdebug" kernel command line option is specified,
* enable debugging messages for NFSROOT.
*/
static int __init nfs_root_debug(char *__unused)
{
nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
return 1;
}
__setup("nfsrootdebug", nfs_root_debug);
#endif
/*
* Parse NFS server and directory information passed on the kernel
* command line.
*
* nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
*
* If there is a "%s" token in the <root-dir> string, it is replaced
* by the ASCII-representation of the client's IP address.
*/
static int __init nfs_root_setup(char *line)
{
ROOT_DEV = Root_NFS;
if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
} else {
size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
if (n >= sizeof(nfs_root_parms))
line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0';
sprintf(nfs_root_parms, NFS_ROOT, line);
}
/*
* Extract the IP address of the NFS server containing our
* root file system, if one was specified.
*
* Note: root_nfs_parse_addr() removes the server-ip from
* nfs_root_parms, if it exists.
*/
root_server_addr = root_nfs_parse_addr(nfs_root_parms);
return 1;
}
__setup("nfsroot=", nfs_root_setup);
static int __init root_nfs_copy(char *dest, const char *src,
const size_t destlen)
{
if (strlcpy(dest, src, destlen) > destlen)
return -1;
return 0;
}
static int __init root_nfs_cat(char *dest, const char *src,
const size_t destlen)
{
size_t len = strlen(dest);
if (len && dest[len - 1] != ',')
if (strlcat(dest, ",", destlen) > destlen)
return -1;
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
}
/*
* Parse out root export path and mount options from
* passed-in string @incoming.
*
* Copy the export path into @exppath.
*/
static int __init root_nfs_parse_options(char *incoming, char *exppath,
const size_t exppathlen)
{
char *p;
/*
* Set the NFS remote path
*/
p = strsep(&incoming, ",");
if (*p != '\0' && strcmp(p, "default") != 0)
if (root_nfs_copy(exppath, p, exppathlen))
return -1;
/*
* @incoming now points to the rest of the string; if it
* contains something, append it to our root options buffer
*/
if (incoming != NULL && *incoming != '\0')
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
return 0;
}
/*
* Decode the export directory path name and NFS options from
* the kernel command line. This has to be done late in order to
* use a dynamically acquired client IP address for the remote
* root directory path.
*
* Returns zero if successful; otherwise -1 is returned.
*/
static int __init root_nfs_data(char *cmdline)
{
char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
tmp = kzalloc(tmplen, GFP_KERNEL);
if (tmp == NULL)
goto out_nomem;
strcpy(tmp, NFS_ROOT);
if (root_server_path[0] != '\0') {
dprintk("Root-NFS: DHCPv4 option 17: %s\n",
root_server_path);
if (root_nfs_parse_options(root_server_path, tmp, tmplen))
goto out_optionstoolong;
}
if (cmdline[0] != '\0') {
dprintk("Root-NFS: nfsroot=%s\n", cmdline);
if (root_nfs_parse_options(cmdline, tmp, tmplen))
goto out_optionstoolong;
}
/*
* Append mandatory options for nfsroot so they override
* what has come before
*/
snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
/*
* Set up nfs_root_device. For NFS mounts, this looks like
*
* server:/path
*
* At this point, utsname()->nodename contains our local
* IP address or hostname, set by ipconfig. If "%s" exists
* in tmp, substitute the nodename, then shovel the whole
* mess into nfs_root_device.
*/
len = snprintf(nfs_export_path, sizeof(nfs_export_path),
tmp, utsname()->nodename);
if (len > (int)sizeof(nfs_export_path))
goto out_devnametoolong;
len = snprintf(nfs_root_device, sizeof(nfs_root_device),
"%pI4:%s", &servaddr, nfs_export_path);
if (len > (int)sizeof(nfs_root_device))
goto out_devnametoolong;
retval = 0;
out:
kfree(tmp);
return retval;
out_nomem:
printk(KERN_ERR "Root-NFS: could not allocate memory\n");
goto out;
out_optionstoolong:
printk(KERN_ERR "Root-NFS: mount options string too long\n");
goto out;
out_devnametoolong:
printk(KERN_ERR "Root-NFS: root device name too long.\n");
goto out;
}
/**
* nfs_root_data - Return prepared 'data' for NFSROOT mount
* @root_device: OUT: address of string containing NFSROOT device
* @root_data: OUT: address of string containing NFSROOT mount options
*
* Returns zero and sets @root_device and @root_data if successful,
* otherwise -1 is returned.
*/
int __init nfs_root_data(char **root_device, char **root_data)
{
servaddr = root_server_addr;
if (servaddr == htonl(INADDR_NONE)) {
printk(KERN_ERR "Root-NFS: no NFS server address\n");
return -1;
}
if (root_nfs_data(nfs_root_parms) < 0)
return -1;
*root_device = nfs_root_device;
*root_data = nfs_root_options;
return 0;
}

9
fs/nfs/nfstrace.c Normal file
View file

@ -0,0 +1,9 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <linux/nfs_fs.h>
#include <linux/namei.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include "nfstrace.h"

730
fs/nfs/nfstrace.h Normal file
View file

@ -0,0 +1,730 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM nfs
#if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NFS_H
#include <linux/tracepoint.h>
#define nfs_show_file_type(ftype) \
__print_symbolic(ftype, \
{ DT_UNKNOWN, "UNKNOWN" }, \
{ DT_FIFO, "FIFO" }, \
{ DT_CHR, "CHR" }, \
{ DT_DIR, "DIR" }, \
{ DT_BLK, "BLK" }, \
{ DT_REG, "REG" }, \
{ DT_LNK, "LNK" }, \
{ DT_SOCK, "SOCK" }, \
{ DT_WHT, "WHT" })
#define nfs_show_cache_validity(v) \
__print_flags(v, "|", \
{ NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \
{ NFS_INO_INVALID_DATA, "INVALID_DATA" }, \
{ NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \
{ NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \
{ NFS_INO_INVALID_ACL, "INVALID_ACL" }, \
{ NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \
{ NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \
{ NFS_INO_INVALID_LABEL, "INVALID_LABEL" })
#define nfs_show_nfsi_flags(v) \
__print_flags(v, "|", \
{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
{ 1 << NFS_INO_STALE, "STALE" }, \
{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
{ 1 << NFS_INO_COMMIT, "COMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
DECLARE_EVENT_CLASS(nfs_inode_event,
TP_PROTO(
const struct inode *inode
),
TP_ARGS(inode),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
__field(u64, version)
),
TP_fast_assign(
const struct nfs_inode *nfsi = NFS_I(inode);
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode->i_version;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(unsigned long long)__entry->version
)
);
DECLARE_EVENT_CLASS(nfs_inode_event_done,
TP_PROTO(
const struct inode *inode,
int error
),
TP_ARGS(inode, error),
TP_STRUCT__entry(
__field(int, error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(unsigned char, type)
__field(u64, fileid)
__field(u64, version)
__field(loff_t, size)
__field(unsigned long, nfsi_flags)
__field(unsigned long, cache_validity)
),
TP_fast_assign(
const struct nfs_inode *nfsi = NFS_I(inode);
__entry->error = error;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->type = nfs_umode_to_dtype(inode->i_mode);
__entry->version = inode->i_version;
__entry->size = i_size_read(inode);
__entry->nfsi_flags = nfsi->flags;
__entry->cache_validity = nfsi->cache_validity;
),
TP_printk(
"error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
"type=%u (%s) version=%llu size=%lld "
"cache_validity=%lu (%s) nfs_flags=%ld (%s)",
__entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->type,
nfs_show_file_type(__entry->type),
(unsigned long long)__entry->version,
(long long)__entry->size,
__entry->cache_validity,
nfs_show_cache_validity(__entry->cache_validity),
__entry->nfsi_flags,
nfs_show_nfsi_flags(__entry->nfsi_flags)
)
);
#define DEFINE_NFS_INODE_EVENT(name) \
DEFINE_EVENT(nfs_inode_event, name, \
TP_PROTO( \
const struct inode *inode \
), \
TP_ARGS(inode))
#define DEFINE_NFS_INODE_EVENT_DONE(name) \
DEFINE_EVENT(nfs_inode_event_done, name, \
TP_PROTO( \
const struct inode *inode, \
int error \
), \
TP_ARGS(inode, error))
DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit);
DEFINE_NFS_INODE_EVENT(nfs_getattr_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit);
DEFINE_NFS_INODE_EVENT(nfs_setattr_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit);
DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit);
DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
DEFINE_NFS_INODE_EVENT(nfs_access_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit);
#define show_lookup_flags(flags) \
__print_flags((unsigned long)flags, "|", \
{ LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
{ LOOKUP_DIRECTORY, "DIRECTORY" }, \
{ LOOKUP_OPEN, "OPEN" }, \
{ LOOKUP_CREATE, "CREATE" }, \
{ LOOKUP_EXCL, "EXCL" })
DECLARE_EVENT_CLASS(nfs_lookup_event,
TP_PROTO(
const struct inode *dir,
const struct dentry *dentry,
unsigned int flags
),
TP_ARGS(dir, dentry, flags),
TP_STRUCT__entry(
__field(unsigned int, flags)
__field(dev_t, dev)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"flags=%u (%s) name=%02x:%02x:%llu/%s",
__entry->flags,
show_lookup_flags(__entry->flags),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
#define DEFINE_NFS_LOOKUP_EVENT(name) \
DEFINE_EVENT(nfs_lookup_event, name, \
TP_PROTO( \
const struct inode *dir, \
const struct dentry *dentry, \
unsigned int flags \
), \
TP_ARGS(dir, dentry, flags))
DECLARE_EVENT_CLASS(nfs_lookup_event_done,
TP_PROTO(
const struct inode *dir,
const struct dentry *dentry,
unsigned int flags,
int error
),
TP_ARGS(dir, dentry, flags, error),
TP_STRUCT__entry(
__field(int, error)
__field(unsigned int, flags)
__field(dev_t, dev)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error;
__entry->flags = flags;
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"error=%d flags=%u (%s) name=%02x:%02x:%llu/%s",
__entry->error,
__entry->flags,
show_lookup_flags(__entry->flags),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
#define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \
DEFINE_EVENT(nfs_lookup_event_done, name, \
TP_PROTO( \
const struct inode *dir, \
const struct dentry *dentry, \
unsigned int flags, \
int error \
), \
TP_ARGS(dir, dentry, flags, error))
DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter);
DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit);
DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter);
DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit);
#define show_open_flags(flags) \
__print_flags((unsigned long)flags, "|", \
{ O_CREAT, "O_CREAT" }, \
{ O_EXCL, "O_EXCL" }, \
{ O_TRUNC, "O_TRUNC" }, \
{ O_APPEND, "O_APPEND" }, \
{ O_DSYNC, "O_DSYNC" }, \
{ O_DIRECT, "O_DIRECT" }, \
{ O_DIRECTORY, "O_DIRECTORY" })
#define show_fmode_flags(mode) \
__print_flags(mode, "|", \
{ ((__force unsigned long)FMODE_READ), "READ" }, \
{ ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
{ ((__force unsigned long)FMODE_EXEC), "EXEC" })
TRACE_EVENT(nfs_atomic_open_enter,
TP_PROTO(
const struct inode *dir,
const struct nfs_open_context *ctx,
unsigned int flags
),
TP_ARGS(dir, ctx, flags),
TP_STRUCT__entry(
__field(unsigned int, flags)
__field(unsigned int, fmode)
__field(dev_t, dev)
__field(u64, dir)
__string(name, ctx->dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned int)ctx->mode;
__assign_str(name, ctx->dentry->d_name.name);
),
TP_printk(
"flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s",
__entry->flags,
show_open_flags(__entry->flags),
show_fmode_flags(__entry->fmode),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
TRACE_EVENT(nfs_atomic_open_exit,
TP_PROTO(
const struct inode *dir,
const struct nfs_open_context *ctx,
unsigned int flags,
int error
),
TP_ARGS(dir, ctx, flags, error),
TP_STRUCT__entry(
__field(int, error)
__field(unsigned int, flags)
__field(unsigned int, fmode)
__field(dev_t, dev)
__field(u64, dir)
__string(name, ctx->dentry->d_name.name)
),
TP_fast_assign(
__entry->error = error;
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned int)ctx->mode;
__assign_str(name, ctx->dentry->d_name.name);
),
TP_printk(
"error=%d flags=%u (%s) fmode=%s "
"name=%02x:%02x:%llu/%s",
__entry->error,
__entry->flags,
show_open_flags(__entry->flags),
show_fmode_flags(__entry->fmode),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
TRACE_EVENT(nfs_create_enter,
TP_PROTO(
const struct inode *dir,
const struct dentry *dentry,
unsigned int flags
),
TP_ARGS(dir, dentry, flags),
TP_STRUCT__entry(
__field(unsigned int, flags)
__field(dev_t, dev)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"flags=%u (%s) name=%02x:%02x:%llu/%s",
__entry->flags,
show_open_flags(__entry->flags),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
TRACE_EVENT(nfs_create_exit,
TP_PROTO(
const struct inode *dir,
const struct dentry *dentry,
unsigned int flags,
int error
),
TP_ARGS(dir, dentry, flags, error),
TP_STRUCT__entry(
__field(int, error)
__field(unsigned int, flags)
__field(dev_t, dev)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->error = error;
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"error=%d flags=%u (%s) name=%02x:%02x:%llu/%s",
__entry->error,
__entry->flags,
show_open_flags(__entry->flags),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
DECLARE_EVENT_CLASS(nfs_directory_event,
TP_PROTO(
const struct inode *dir,
const struct dentry *dentry
),
TP_ARGS(dir, dentry),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"name=%02x:%02x:%llu/%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
#define DEFINE_NFS_DIRECTORY_EVENT(name) \
DEFINE_EVENT(nfs_directory_event, name, \
TP_PROTO( \
const struct inode *dir, \
const struct dentry *dentry \
), \
TP_ARGS(dir, dentry))
DECLARE_EVENT_CLASS(nfs_directory_event_done,
TP_PROTO(
const struct inode *dir,
const struct dentry *dentry,
int error
),
TP_ARGS(dir, dentry, error),
TP_STRUCT__entry(
__field(int, error)
__field(dev_t, dev)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error;
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"error=%d name=%02x:%02x:%llu/%s",
__entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
#define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \
DEFINE_EVENT(nfs_directory_event_done, name, \
TP_PROTO( \
const struct inode *dir, \
const struct dentry *dentry, \
int error \
), \
TP_ARGS(dir, dentry, error))
DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter);
DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit);
DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter);
DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit);
DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter);
DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit);
DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter);
DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit);
DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter);
DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit);
DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter);
DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit);
TRACE_EVENT(nfs_link_enter,
TP_PROTO(
const struct inode *inode,
const struct inode *dir,
const struct dentry *dentry
),
TP_ARGS(inode, dir, dentry),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, fileid)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->fileid,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
TRACE_EVENT(nfs_link_exit,
TP_PROTO(
const struct inode *inode,
const struct inode *dir,
const struct dentry *dentry,
int error
),
TP_ARGS(inode, dir, dentry, error),
TP_STRUCT__entry(
__field(int, error)
__field(dev_t, dev)
__field(u64, fileid)
__field(u64, dir)
__string(name, dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
__entry->error = error;
__assign_str(name, dentry->d_name.name);
),
TP_printk(
"error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
__entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->fileid,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
DECLARE_EVENT_CLASS(nfs_rename_event,
TP_PROTO(
const struct inode *old_dir,
const struct dentry *old_dentry,
const struct inode *new_dir,
const struct dentry *new_dentry
),
TP_ARGS(old_dir, old_dentry, new_dir, new_dentry),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, old_dir)
__field(u64, new_dir)
__string(old_name, old_dentry->d_name.name)
__string(new_name, new_dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = old_dir->i_sb->s_dev;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
__assign_str(old_name, old_dentry->d_name.name);
__assign_str(new_name, new_dentry->d_name.name);
),
TP_printk(
"old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->old_dir,
__get_str(old_name),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->new_dir,
__get_str(new_name)
)
);
#define DEFINE_NFS_RENAME_EVENT(name) \
DEFINE_EVENT(nfs_rename_event, name, \
TP_PROTO( \
const struct inode *old_dir, \
const struct dentry *old_dentry, \
const struct inode *new_dir, \
const struct dentry *new_dentry \
), \
TP_ARGS(old_dir, old_dentry, new_dir, new_dentry))
DECLARE_EVENT_CLASS(nfs_rename_event_done,
TP_PROTO(
const struct inode *old_dir,
const struct dentry *old_dentry,
const struct inode *new_dir,
const struct dentry *new_dentry,
int error
),
TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, error)
__field(u64, old_dir)
__string(old_name, old_dentry->d_name.name)
__field(u64, new_dir)
__string(new_name, new_dentry->d_name.name)
),
TP_fast_assign(
__entry->dev = old_dir->i_sb->s_dev;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
__entry->error = error;
__assign_str(old_name, old_dentry->d_name.name);
__assign_str(new_name, new_dentry->d_name.name);
),
TP_printk(
"error=%d old_name=%02x:%02x:%llu/%s "
"new_name=%02x:%02x:%llu/%s",
__entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->old_dir,
__get_str(old_name),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->new_dir,
__get_str(new_name)
)
);
#define DEFINE_NFS_RENAME_EVENT_DONE(name) \
DEFINE_EVENT(nfs_rename_event_done, name, \
TP_PROTO( \
const struct inode *old_dir, \
const struct dentry *old_dentry, \
const struct inode *new_dir, \
const struct dentry *new_dentry, \
int error \
), \
TP_ARGS(old_dir, old_dentry, new_dir, \
new_dentry, error))
DEFINE_NFS_RENAME_EVENT(nfs_rename_enter);
DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit);
DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename);
TRACE_EVENT(nfs_sillyrename_unlink,
TP_PROTO(
const struct nfs_unlinkdata *data,
int error
),
TP_ARGS(data, error),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, error)
__field(u64, dir)
__dynamic_array(char, name, data->args.name.len + 1)
),
TP_fast_assign(
struct inode *dir = data->dir;
size_t len = data->args.name.len;
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error;
memcpy(__get_dynamic_array(name),
data->args.name.name, len);
((char *)__get_dynamic_array(name))[len] = 0;
),
TP_printk(
"error=%d name=%02x:%02x:%llu/%s",
__entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
__get_str(name)
)
);
#endif /* _TRACE_NFS_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE nfstrace
/* This part must be outside protection */
#include <trace/define_trace.h>

5
fs/nfs/objlayout/Kbuild Normal file
View file

@ -0,0 +1,5 @@
#
# Makefile for the pNFS Objects Layout Driver kernel module
#
objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o

View file

@ -0,0 +1,673 @@
/*
* pNFS Objects layout implementation over open-osd initiator library
*
* Copyright (C) 2009 Panasas Inc. [year of first publication]
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
* Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* See the file COPYING included with this distribution for more details.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Panasas company nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/module.h>
#include <scsi/osd_ore.h>
#include "objlayout.h"
#include "../internal.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
struct objio_dev_ent {
struct nfs4_deviceid_node id_node;
struct ore_dev od;
};
static void
objio_free_deviceid_node(struct nfs4_deviceid_node *d)
{
struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
dprintk("%s: free od=%p\n", __func__, de->od.od);
osduld_put_device(de->od.od);
kfree(de);
}
struct objio_segment {
struct pnfs_layout_segment lseg;
struct ore_layout layout;
struct ore_components oc;
};
static inline struct objio_segment *
OBJIO_LSEG(struct pnfs_layout_segment *lseg)
{
return container_of(lseg, struct objio_segment, lseg);
}
struct objio_state {
/* Generic layer */
struct objlayout_io_res oir;
bool sync;
/*FIXME: Support for extra_bytes at ore_get_rw_state() */
struct ore_io_state *ios;
};
/* Send and wait for a get_device_info of devices in the layout,
then look them up with the osd_initiator library */
struct nfs4_deviceid_node *
objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
gfp_t gfp_flags)
{
struct pnfs_osd_deviceaddr *deviceaddr;
struct objio_dev_ent *ode = NULL;
struct osd_dev *od;
struct osd_dev_info odi;
bool retry_flag = true;
__be32 *p;
int err;
deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
if (!deviceaddr)
return NULL;
p = page_address(pdev->pages[0]);
pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
odi.systemid_len = deviceaddr->oda_systemid.len;
if (odi.systemid_len > sizeof(odi.systemid)) {
dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
__func__, sizeof(odi.systemid));
err = -EINVAL;
goto out;
} else if (odi.systemid_len)
memcpy(odi.systemid, deviceaddr->oda_systemid.data,
odi.systemid_len);
odi.osdname_len = deviceaddr->oda_osdname.len;
odi.osdname = (u8 *)deviceaddr->oda_osdname.data;
if (!odi.osdname_len && !odi.systemid_len) {
dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
__func__);
err = -ENODEV;
goto out;
}
retry_lookup:
od = osduld_info_lookup(&odi);
if (unlikely(IS_ERR(od))) {
err = PTR_ERR(od);
dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
if (err == -ENODEV && retry_flag) {
err = objlayout_autologin(deviceaddr);
if (likely(!err)) {
retry_flag = false;
goto retry_lookup;
}
}
goto out;
}
dprintk("Adding new dev_id(%llx:%llx)\n",
_DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
ode = kzalloc(sizeof(*ode), gfp_flags);
if (!ode) {
dprintk("%s: -ENOMEM od=%p\n", __func__, od);
goto out;
}
nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
kfree(deviceaddr);
ode->od.od = od;
return &ode->id_node;
out:
kfree(deviceaddr);
return NULL;
}
static void copy_single_comp(struct ore_components *oc, unsigned c,
struct pnfs_osd_object_cred *src_comp)
{
struct ore_comp *ocomp = &oc->comps[c];
WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
}
static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
struct objio_segment **pseg)
{
/* This is the in memory structure of the objio_segment
*
* struct __alloc_objio_segment {
* struct objio_segment olseg;
* struct ore_dev *ods[numdevs];
* struct ore_comp comps[numdevs];
* } *aolseg;
* NOTE: The code as above compiles and runs perfectly. It is elegant,
* type safe and compact. At some Past time Linus has decided he does not
* like variable length arrays, For the sake of this principal we uglify
* the code as below.
*/
struct objio_segment *lseg;
size_t lseg_size = sizeof(*lseg) +
numdevs * sizeof(lseg->oc.ods[0]) +
numdevs * sizeof(*lseg->oc.comps);
lseg = kzalloc(lseg_size, gfp_flags);
if (unlikely(!lseg)) {
dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
numdevs, lseg_size);
return -ENOMEM;
}
lseg->oc.numdevs = numdevs;
lseg->oc.single_comp = EC_MULTPLE_COMPS;
lseg->oc.ods = (void *)(lseg + 1);
lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
*pseg = lseg;
return 0;
}
int objio_alloc_lseg(struct pnfs_layout_segment **outp,
struct pnfs_layout_hdr *pnfslay,
struct pnfs_layout_range *range,
struct xdr_stream *xdr,
gfp_t gfp_flags)
{
struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
struct objio_segment *objio_seg;
struct pnfs_osd_xdr_decode_layout_iter iter;
struct pnfs_osd_layout layout;
struct pnfs_osd_object_cred src_comp;
unsigned cur_comp;
int err;
err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
if (unlikely(err))
return err;
err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
if (unlikely(err))
return err;
objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
objio_seg->layout.group_width = layout.olo_map.odm_group_width;
objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
err = ore_verify_layout(layout.olo_map.odm_num_comps,
&objio_seg->layout);
if (unlikely(err))
goto err;
objio_seg->oc.first_dev = layout.olo_comps_index;
cur_comp = 0;
while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
struct nfs4_deviceid_node *d;
struct objio_dev_ent *ode;
copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
d = nfs4_find_get_deviceid(server,
&src_comp.oc_object_id.oid_device_id,
pnfslay->plh_lc_cred, gfp_flags);
if (!d) {
err = -ENXIO;
goto err;
}
ode = container_of(d, struct objio_dev_ent, id_node);
objio_seg->oc.ods[cur_comp++] = &ode->od;
}
/* pnfs_osd_xdr_decode_layout_comp returns false on error */
if (unlikely(err))
goto err;
*outp = &objio_seg->lseg;
return 0;
err:
kfree(objio_seg);
dprintk("%s: Error: return %d\n", __func__, err);
*outp = NULL;
return err;
}
void objio_free_lseg(struct pnfs_layout_segment *lseg)
{
int i;
struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
for (i = 0; i < objio_seg->oc.numdevs; i++) {
struct ore_dev *od = objio_seg->oc.ods[i];
struct objio_dev_ent *ode;
if (!od)
break;
ode = container_of(od, typeof(*ode), od);
nfs4_put_deviceid_node(&ode->id_node);
}
kfree(objio_seg);
}
static int
objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
struct objio_state **outp)
{
struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
struct ore_io_state *ios;
int ret;
struct __alloc_objio_state {
struct objio_state objios;
struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
} *aos;
aos = kzalloc(sizeof(*aos), gfp_flags);
if (unlikely(!aos))
return -ENOMEM;
objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
aos->ioerrs, rpcdata, pnfs_layout_type);
ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
offset, count, &ios);
if (unlikely(ret)) {
kfree(aos);
return ret;
}
ios->pages = pages;
ios->pgbase = pgbase;
ios->private = aos;
BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
aos->objios.sync = 0;
aos->objios.ios = ios;
*outp = &aos->objios;
return 0;
}
void objio_free_result(struct objlayout_io_res *oir)
{
struct objio_state *objios = container_of(oir, struct objio_state, oir);
ore_put_io_state(objios->ios);
kfree(objios);
}
static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
{
switch (oep) {
case OSD_ERR_PRI_NO_ERROR:
return (enum pnfs_osd_errno)0;
case OSD_ERR_PRI_CLEAR_PAGES:
BUG_ON(1);
return 0;
case OSD_ERR_PRI_RESOURCE:
return PNFS_OSD_ERR_RESOURCE;
case OSD_ERR_PRI_BAD_CRED:
return PNFS_OSD_ERR_BAD_CRED;
case OSD_ERR_PRI_NO_ACCESS:
return PNFS_OSD_ERR_NO_ACCESS;
case OSD_ERR_PRI_UNREACHABLE:
return PNFS_OSD_ERR_UNREACHABLE;
case OSD_ERR_PRI_NOT_FOUND:
return PNFS_OSD_ERR_NOT_FOUND;
case OSD_ERR_PRI_NO_SPACE:
return PNFS_OSD_ERR_NO_SPACE;
default:
WARN_ON(1);
/* fallthrough */
case OSD_ERR_PRI_EIO:
return PNFS_OSD_ERR_EIO;
}
}
static void __on_dev_error(struct ore_io_state *ios,
struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
u64 dev_offset, u64 dev_len)
{
struct objio_state *objios = ios->private;
struct pnfs_osd_objid pooid;
struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
/* FIXME: what to do with more-then-one-group layouts. We need to
* translate from ore_io_state index to oc->comps index
*/
unsigned comp = dev_index;
pooid.oid_device_id = ode->id_node.deviceid;
pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
pooid.oid_object_id = ios->oc->comps[comp].obj.id;
objlayout_io_set_result(&objios->oir, comp,
&pooid, osd_pri_2_pnfs_err(oep),
dev_offset, dev_len, !ios->reading);
}
/*
* read
*/
static void _read_done(struct ore_io_state *ios, void *private)
{
struct objio_state *objios = private;
ssize_t status;
int ret = ore_check_io(ios, &__on_dev_error);
/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
if (likely(!ret))
status = ios->length;
else
status = ret;
objlayout_read_done(&objios->oir, status, objios->sync);
}
int objio_read_pagelist(struct nfs_pgio_header *hdr)
{
struct objio_state *objios;
int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
hdr->lseg, hdr->args.pages, hdr->args.pgbase,
hdr->args.offset, hdr->args.count, hdr,
GFP_KERNEL, &objios);
if (unlikely(ret))
return ret;
objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
hdr->args.offset, hdr->args.count);
ret = ore_read(objios->ios);
if (unlikely(ret))
objio_free_result(&objios->oir);
return ret;
}
/*
* write
*/
static void _write_done(struct ore_io_state *ios, void *private)
{
struct objio_state *objios = private;
ssize_t status;
int ret = ore_check_io(ios, &__on_dev_error);
/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
if (likely(!ret)) {
/* FIXME: should be based on the OSD's persistence model
* See OSD2r05 Section 4.13 Data persistence model */
objios->oir.committed = NFS_FILE_SYNC;
status = ios->length;
} else {
status = ret;
}
objlayout_write_done(&objios->oir, status, objios->sync);
}
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
struct nfs_pgio_header *hdr = objios->oir.rpcdata;
struct address_space *mapping = hdr->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page;
loff_t i_size = i_size_read(hdr->inode);
if (offset >= i_size) {
*uptodate = true;
dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
return ZERO_PAGE(0);
}
page = find_get_page(mapping, index);
if (!page) {
page = find_or_create_page(mapping, index, GFP_NOFS);
if (unlikely(!page)) {
dprintk("%s: grab_cache_page Failed index=0x%lx\n",
__func__, index);
return NULL;
}
unlock_page(page);
}
if (PageDirty(page) || PageWriteback(page))
*uptodate = true;
else
*uptodate = PageUptodate(page);
dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
return page;
}
static void __r4w_put_page(void *priv, struct page *page)
{
dprintk("%s: index=0x%lx\n", __func__,
(page == ZERO_PAGE(0)) ? -1UL : page->index);
if (ZERO_PAGE(0) != page)
page_cache_release(page);
return;
}
static const struct _ore_r4w_op _r4w_op = {
.get_page = &__r4w_get_page,
.put_page = &__r4w_put_page,
};
int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
{
struct objio_state *objios;
int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
hdr->lseg, hdr->args.pages, hdr->args.pgbase,
hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
&objios);
if (unlikely(ret))
return ret;
objios->sync = 0 != (how & FLUSH_SYNC);
objios->ios->r4w = &_r4w_op;
if (!objios->sync)
objios->ios->done = _write_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
hdr->args.offset, hdr->args.count);
ret = ore_write(objios->ios);
if (unlikely(ret)) {
objio_free_result(&objios->oir);
return ret;
}
if (objios->sync)
_write_done(objios->ios, objios);
return 0;
}
/*
* Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
* of bytes (maximum @req->wb_bytes) that can be coalesced.
*/
static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev, struct nfs_page *req)
{
unsigned int size;
size = pnfs_generic_pg_test(pgio, prev, req);
if (!size || pgio->pg_count + req->wb_bytes >
(unsigned long)pgio->pg_layout_private)
return 0;
return min(size, req->wb_bytes);
}
static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
pnfs_generic_pg_init_read(pgio, req);
if (unlikely(pgio->pg_lseg == NULL))
return; /* Not pNFS */
pgio->pg_layout_private = (void *)
OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
}
static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
unsigned long *stripe_end)
{
u32 stripe_off;
unsigned stripe_size;
if (layout->raid_algorithm == PNFS_OSD_RAID_0)
return true;
stripe_size = layout->stripe_unit *
(layout->group_width - layout->parity);
div_u64_rem(offset, stripe_size, &stripe_off);
if (!stripe_off)
return true;
*stripe_end = stripe_size - stripe_off;
return false;
}
static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
unsigned long stripe_end = 0;
u64 wb_size;
if (pgio->pg_dreq == NULL)
wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
else
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
pnfs_generic_pg_init_write(pgio, req, wb_size);
if (unlikely(pgio->pg_lseg == NULL))
return; /* Not pNFS */
if (req->wb_offset ||
!aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
&OBJIO_LSEG(pgio->pg_lseg)->layout,
&stripe_end)) {
pgio->pg_layout_private = (void *)stripe_end;
} else {
pgio->pg_layout_private = (void *)
OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
}
}
static const struct nfs_pageio_ops objio_pg_read_ops = {
.pg_init = objio_init_read,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops objio_pg_write_ops = {
.pg_init = objio_init_write,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
.flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_LAYOUTRET_ON_ERROR,
.max_deviceinfo_size = PAGE_SIZE,
.owner = THIS_MODULE,
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
.free_layout_hdr = objlayout_free_layout_hdr,
.alloc_lseg = objlayout_alloc_lseg,
.free_lseg = objlayout_free_lseg,
.read_pagelist = objlayout_read_pagelist,
.write_pagelist = objlayout_write_pagelist,
.pg_read_ops = &objio_pg_read_ops,
.pg_write_ops = &objio_pg_write_ops,
.free_deviceid_node = objio_free_deviceid_node,
.encode_layoutcommit = objlayout_encode_layoutcommit,
.encode_layoutreturn = objlayout_encode_layoutreturn,
};
MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
MODULE_LICENSE("GPL");
static int __init
objlayout_init(void)
{
int ret = pnfs_register_layoutdriver(&objlayout_type);
if (ret)
printk(KERN_INFO
"NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
__func__, ret);
else
printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
__func__);
return ret;
}
static void __exit
objlayout_exit(void)
{
pnfs_unregister_layoutdriver(&objlayout_type);
printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
__func__);
}
MODULE_ALIAS("nfs-layouttype4-2");
module_init(objlayout_init);
module_exit(objlayout_exit);

View file

@ -0,0 +1,706 @@
/*
* pNFS Objects layout driver high level definitions
*
* Copyright (C) 2007 Panasas Inc. [year of first publication]
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
* Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* See the file COPYING included with this distribution for more details.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Panasas company nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/kmod.h>
#include <linux/moduleparam.h>
#include <linux/ratelimit.h>
#include <scsi/osd_initiator.h>
#include "objlayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
* Create a objlayout layout structure for the given inode and return it.
*/
struct pnfs_layout_hdr *
objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
{
struct objlayout *objlay;
objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
if (!objlay)
return NULL;
spin_lock_init(&objlay->lock);
INIT_LIST_HEAD(&objlay->err_list);
dprintk("%s: Return %p\n", __func__, objlay);
return &objlay->pnfs_layout;
}
/*
* Free an objlayout layout structure
*/
void
objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct objlayout *objlay = OBJLAYOUT(lo);
dprintk("%s: objlay %p\n", __func__, objlay);
WARN_ON(!list_empty(&objlay->err_list));
kfree(objlay);
}
/*
* Unmarshall layout and store it in pnfslay.
*/
struct pnfs_layout_segment *
objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
struct nfs4_layoutget_res *lgr,
gfp_t gfp_flags)
{
int status = -ENOMEM;
struct xdr_stream stream;
struct xdr_buf buf = {
.pages = lgr->layoutp->pages,
.page_len = lgr->layoutp->len,
.buflen = lgr->layoutp->len,
.len = lgr->layoutp->len,
};
struct page *scratch;
struct pnfs_layout_segment *lseg;
dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
scratch = alloc_page(gfp_flags);
if (!scratch)
goto err_nofree;
xdr_init_decode(&stream, &buf, NULL);
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
if (unlikely(status)) {
dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
status);
goto err;
}
__free_page(scratch);
dprintk("%s: Return %p\n", __func__, lseg);
return lseg;
err:
__free_page(scratch);
err_nofree:
dprintk("%s: Err Return=>%d\n", __func__, status);
return ERR_PTR(status);
}
/*
* Free a layout segement
*/
void
objlayout_free_lseg(struct pnfs_layout_segment *lseg)
{
dprintk("%s: freeing layout segment %p\n", __func__, lseg);
if (unlikely(!lseg))
return;
objio_free_lseg(lseg);
}
/*
* I/O Operations
*/
static inline u64
end_offset(u64 start, u64 len)
{
u64 end;
end = start + len;
return end >= start ? end : NFS4_MAX_UINT64;
}
static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
struct page ***p_pages, unsigned *p_pgbase,
u64 offset, unsigned long count)
{
u64 lseg_end_offset;
BUG_ON(offset < lseg->pls_range.offset);
lseg_end_offset = end_offset(lseg->pls_range.offset,
lseg->pls_range.length);
BUG_ON(offset >= lseg_end_offset);
WARN_ON(offset + count > lseg_end_offset);
if (*p_pgbase > PAGE_SIZE) {
dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
*p_pages += *p_pgbase >> PAGE_SHIFT;
*p_pgbase &= ~PAGE_MASK;
}
}
/*
* I/O done common code
*/
static void
objlayout_iodone(struct objlayout_io_res *oir)
{
if (likely(oir->status >= 0)) {
objio_free_result(oir);
} else {
struct objlayout *objlay = oir->objlay;
spin_lock(&objlay->lock);
objlay->delta_space_valid = OBJ_DSU_INVALID;
list_add(&objlay->err_list, &oir->err_list);
spin_unlock(&objlay->lock);
}
}
/*
* objlayout_io_set_result - Set an osd_error code on a specific osd comp.
*
* The @index component IO failed (error returned from target). Register
* the error for later reporting at layout-return.
*/
void
objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
struct pnfs_osd_objid *pooid, int osd_error,
u64 offset, u64 length, bool is_write)
{
struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
BUG_ON(index >= oir->num_comps);
if (osd_error) {
ioerr->oer_component = *pooid;
ioerr->oer_comp_offset = offset;
ioerr->oer_comp_length = length;
ioerr->oer_iswrite = is_write;
ioerr->oer_errno = osd_error;
dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
__func__, index, ioerr->oer_errno,
ioerr->oer_iswrite,
_DEVID_LO(&ioerr->oer_component.oid_device_id),
_DEVID_HI(&ioerr->oer_component.oid_device_id),
ioerr->oer_component.oid_partition_id,
ioerr->oer_component.oid_object_id,
ioerr->oer_comp_offset,
ioerr->oer_comp_length);
} else {
/* User need not call if no error is reported */
ioerr->oer_errno = 0;
}
}
/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
* This is because the osd completion is called with ints-off from
* the block layer
*/
static void _rpc_read_complete(struct work_struct *work)
{
struct rpc_task *task;
struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
hdr = container_of(task, struct nfs_pgio_header, task);
pnfs_ld_read_done(hdr);
}
void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
struct nfs_pgio_header *hdr = oir->rpcdata;
oir->status = hdr->task.tk_status = status;
if (status >= 0)
hdr->res.count = status;
else
hdr->pnfs_error = status;
objlayout_iodone(oir);
/* must not use oir after this point */
dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
status, hdr->res.eof, sync);
if (sync)
pnfs_ld_read_done(hdr);
else {
INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
schedule_work(&hdr->task.u.tk_work);
}
}
/*
* Perform sync or async reads.
*/
enum pnfs_try_status
objlayout_read_pagelist(struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
loff_t offset = hdr->args.offset;
size_t count = hdr->args.count;
int err;
loff_t eof;
eof = i_size_read(inode);
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
err = 0;
hdr->res.count = 0;
hdr->res.eof = 1;
/*FIXME: do we need to call pnfs_ld_read_done() */
goto out;
}
count = eof - offset;
}
hdr->res.eof = (offset + count) >= eof;
_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
&hdr->args.pgbase,
hdr->args.offset, hdr->args.count);
dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
__func__, inode->i_ino, offset, count, hdr->res.eof);
err = objio_read_pagelist(hdr);
out:
if (unlikely(err)) {
hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
return PNFS_ATTEMPTED;
}
/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
* This is because the osd completion is called with ints-off from
* the block layer
*/
static void _rpc_write_complete(struct work_struct *work)
{
struct rpc_task *task;
struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
hdr = container_of(task, struct nfs_pgio_header, task);
pnfs_ld_write_done(hdr);
}
void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
struct nfs_pgio_header *hdr = oir->rpcdata;
oir->status = hdr->task.tk_status = status;
if (status >= 0) {
hdr->res.count = status;
hdr->verf.committed = oir->committed;
} else {
hdr->pnfs_error = status;
}
objlayout_iodone(oir);
/* must not use oir after this point */
dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
status, hdr->verf.committed, sync);
if (sync)
pnfs_ld_write_done(hdr);
else {
INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
schedule_work(&hdr->task.u.tk_work);
}
}
/*
* Perform sync or async writes.
*/
enum pnfs_try_status
objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
{
int err;
_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
&hdr->args.pgbase,
hdr->args.offset, hdr->args.count);
err = objio_write_pagelist(hdr, how);
if (unlikely(err)) {
hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
return PNFS_ATTEMPTED;
}
void
objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args)
{
struct objlayout *objlay = OBJLAYOUT(pnfslay);
struct pnfs_osd_layoutupdate lou;
__be32 *start;
dprintk("%s: Begin\n", __func__);
spin_lock(&objlay->lock);
lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
lou.dsu_delta = objlay->delta_space_used;
objlay->delta_space_used = 0;
objlay->delta_space_valid = OBJ_DSU_INIT;
lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
spin_unlock(&objlay->lock);
start = xdr_reserve_space(xdr, 4);
BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
*start = cpu_to_be32((xdr->p - start - 1) * 4);
dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
lou.dsu_delta, lou.olu_ioerr_flag);
}
static int
err_prio(u32 oer_errno)
{
switch (oer_errno) {
case 0:
return 0;
case PNFS_OSD_ERR_RESOURCE:
return OSD_ERR_PRI_RESOURCE;
case PNFS_OSD_ERR_BAD_CRED:
return OSD_ERR_PRI_BAD_CRED;
case PNFS_OSD_ERR_NO_ACCESS:
return OSD_ERR_PRI_NO_ACCESS;
case PNFS_OSD_ERR_UNREACHABLE:
return OSD_ERR_PRI_UNREACHABLE;
case PNFS_OSD_ERR_NOT_FOUND:
return OSD_ERR_PRI_NOT_FOUND;
case PNFS_OSD_ERR_NO_SPACE:
return OSD_ERR_PRI_NO_SPACE;
default:
WARN_ON(1);
/* fallthrough */
case PNFS_OSD_ERR_EIO:
return OSD_ERR_PRI_EIO;
}
}
static void
merge_ioerr(struct pnfs_osd_ioerr *dest_err,
const struct pnfs_osd_ioerr *src_err)
{
u64 dest_end, src_end;
if (!dest_err->oer_errno) {
*dest_err = *src_err;
/* accumulated device must be blank */
memset(&dest_err->oer_component.oid_device_id, 0,
sizeof(dest_err->oer_component.oid_device_id));
return;
}
if (dest_err->oer_component.oid_partition_id !=
src_err->oer_component.oid_partition_id)
dest_err->oer_component.oid_partition_id = 0;
if (dest_err->oer_component.oid_object_id !=
src_err->oer_component.oid_object_id)
dest_err->oer_component.oid_object_id = 0;
if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
dest_err->oer_comp_offset = src_err->oer_comp_offset;
dest_end = end_offset(dest_err->oer_comp_offset,
dest_err->oer_comp_length);
src_end = end_offset(src_err->oer_comp_offset,
src_err->oer_comp_length);
if (dest_end < src_end)
dest_end = src_end;
dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
(err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
dest_err->oer_errno = src_err->oer_errno;
} else if (src_err->oer_iswrite) {
dest_err->oer_iswrite = true;
dest_err->oer_errno = src_err->oer_errno;
}
}
static void
encode_accumulated_error(struct objlayout *objlay, __be32 *p)
{
struct objlayout_io_res *oir, *tmp;
struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
unsigned i;
for (i = 0; i < oir->num_comps; i++) {
struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
if (!ioerr->oer_errno)
continue;
printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
"is_write=%d dev(%llx:%llx) par=0x%llx "
"obj=0x%llx offset=0x%llx length=0x%llx\n",
__func__, i, ioerr->oer_errno,
ioerr->oer_iswrite,
_DEVID_LO(&ioerr->oer_component.oid_device_id),
_DEVID_HI(&ioerr->oer_component.oid_device_id),
ioerr->oer_component.oid_partition_id,
ioerr->oer_component.oid_object_id,
ioerr->oer_comp_offset,
ioerr->oer_comp_length);
merge_ioerr(&accumulated_err, ioerr);
}
list_del(&oir->err_list);
objio_free_result(oir);
}
pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
}
void
objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
struct xdr_stream *xdr,
const struct nfs4_layoutreturn_args *args)
{
struct objlayout *objlay = OBJLAYOUT(pnfslay);
struct objlayout_io_res *oir, *tmp;
__be32 *start;
dprintk("%s: Begin\n", __func__);
start = xdr_reserve_space(xdr, 4);
BUG_ON(!start);
spin_lock(&objlay->lock);
list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
__be32 *last_xdr = NULL, *p;
unsigned i;
int res = 0;
for (i = 0; i < oir->num_comps; i++) {
struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
if (!ioerr->oer_errno)
continue;
dprintk("%s: err[%d]: errno=%d is_write=%d "
"dev(%llx:%llx) par=0x%llx obj=0x%llx "
"offset=0x%llx length=0x%llx\n",
__func__, i, ioerr->oer_errno,
ioerr->oer_iswrite,
_DEVID_LO(&ioerr->oer_component.oid_device_id),
_DEVID_HI(&ioerr->oer_component.oid_device_id),
ioerr->oer_component.oid_partition_id,
ioerr->oer_component.oid_object_id,
ioerr->oer_comp_offset,
ioerr->oer_comp_length);
p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
if (unlikely(!p)) {
res = -E2BIG;
break; /* accumulated_error */
}
last_xdr = p;
pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
}
/* TODO: use xdr_write_pages */
if (unlikely(res)) {
/* no space for even one error descriptor */
BUG_ON(!last_xdr);
/* we've encountered a situation with lots and lots of
* errors and no space to encode them all. Use the last
* available slot to report the union of all the
* remaining errors.
*/
encode_accumulated_error(objlay, last_xdr);
goto loop_done;
}
list_del(&oir->err_list);
objio_free_result(oir);
}
loop_done:
spin_unlock(&objlay->lock);
*start = cpu_to_be32((xdr->p - start - 1) * 4);
dprintk("%s: Return\n", __func__);
}
enum {
OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
OSD_LOGIN_UPCALL_PATHLEN = 256
};
static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
0600);
MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
struct __auto_login {
char uri[OBJLAYOUT_MAX_URI_LEN];
char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
};
static int __objlayout_upcall(struct __auto_login *login)
{
static char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL
};
char *argv[8];
int ret;
if (unlikely(!osd_login_prog[0])) {
dprintk("%s: osd_login_prog is disabled\n", __func__);
return -EACCES;
}
dprintk("%s uri: %s\n", __func__, login->uri);
dprintk("%s osdname %s\n", __func__, login->osdname);
dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
argv[0] = (char *)osd_login_prog;
argv[1] = "-u";
argv[2] = login->uri;
argv[3] = "-o";
argv[4] = login->osdname;
argv[5] = "-s";
argv[6] = login->systemid_hex;
argv[7] = NULL;
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
/*
* Disable the upcall mechanism if we're getting an ENOENT or
* EACCES error. The admin can re-enable it on the fly by using
* sysfs to set the objlayoutdriver.osd_login_prog module parameter once
* the problem has been fixed.
*/
if (ret == -ENOENT || ret == -EACCES) {
printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
"objlayoutdriver.osd_login_prog kernel parameter!\n",
osd_login_prog);
osd_login_prog[0] = '\0';
}
dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
return ret;
}
/* Assume dest is all zeros */
static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
char *dest, int max_len,
const char *var_name)
{
if (!s.len)
return;
if (s.len >= max_len) {
pr_warn_ratelimited(
"objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
var_name, s.len, max_len);
s.len = max_len - 1; /* space for null terminator */
}
memcpy(dest, s.data, s.len);
}
/* Assume sysid is all zeros */
static void _sysid_2_hex(struct nfs4_string s,
char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
{
int i;
char *cur;
if (!s.len)
return;
if (s.len != OSD_SYSTEMID_LEN) {
pr_warn_ratelimited(
"objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
s.len);
if (s.len > OSD_SYSTEMID_LEN)
s.len = OSD_SYSTEMID_LEN;
}
cur = sysid;
for (i = 0; i < s.len; i++)
cur = hex_byte_pack(cur, s.data[i]);
}
int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
{
int rc;
struct __auto_login login;
if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
return -ENODEV;
memset(&login, 0, sizeof(login));
__copy_nfsS_and_zero_terminate(
deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
login.uri, sizeof(login.uri), "URI");
__copy_nfsS_and_zero_terminate(
deviceaddr->oda_osdname,
login.osdname, sizeof(login.osdname), "OSDNAME");
_sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
rc = __objlayout_upcall(&login);
if (rc > 0) /* script returns positive values */
rc = -ENODEV;
return rc;
}

View file

@ -0,0 +1,184 @@
/*
* Data types and function declerations for interfacing with the
* pNFS standard object layout driver.
*
* Copyright (C) 2007 Panasas Inc. [year of first publication]
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
* Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* See the file COPYING included with this distribution for more details.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Panasas company nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _OBJLAYOUT_H
#define _OBJLAYOUT_H
#include <linux/nfs_fs.h>
#include <linux/pnfs_osd_xdr.h>
#include "../pnfs.h"
/*
* per-inode layout
*/
struct objlayout {
struct pnfs_layout_hdr pnfs_layout;
/* for layout_commit */
enum osd_delta_space_valid_enum {
OBJ_DSU_INIT = 0,
OBJ_DSU_VALID,
OBJ_DSU_INVALID,
} delta_space_valid;
s64 delta_space_used; /* consumed by write ops */
/* for layout_return */
spinlock_t lock;
struct list_head err_list;
};
static inline struct objlayout *
OBJLAYOUT(struct pnfs_layout_hdr *lo)
{
return container_of(lo, struct objlayout, pnfs_layout);
}
/*
* per-I/O operation state
* embedded in objects provider io_state data structure
*/
struct objlayout_io_res {
struct objlayout *objlay;
void *rpcdata;
int status; /* res */
int committed; /* res */
/* Error reporting (layout_return) */
struct list_head err_list;
unsigned num_comps;
/* Pointer to array of error descriptors of size num_comps.
* It should contain as many entries as devices in the osd_layout
* that participate in the I/O. It is up to the io_engine to allocate
* needed space and set num_comps.
*/
struct pnfs_osd_ioerr *ioerrs;
};
static inline
void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
struct pnfs_layout_hdr *pnfs_layout_type)
{
oir->objlay = OBJLAYOUT(pnfs_layout_type);
oir->rpcdata = rpcdata;
INIT_LIST_HEAD(&oir->err_list);
oir->num_comps = num_comps;
oir->ioerrs = ioerrs;
}
/*
* Raid engine I/O API
*/
extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
struct pnfs_layout_hdr *pnfslay,
struct pnfs_layout_range *range,
struct xdr_stream *xdr,
gfp_t gfp_flags);
extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
/* objio_free_result will free these @oir structs received from
* objlayout_{read,write}_done
*/
extern void objio_free_result(struct objlayout_io_res *oir);
extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
/*
* callback API
*/
extern void objlayout_io_set_result(struct objlayout_io_res *oir,
unsigned index, struct pnfs_osd_objid *pooid,
int osd_error, u64 offset, u64 length, bool is_write);
static inline void
objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
{
/* If one of the I/Os errored out and the delta_space_used was
* invalid we render the complete report as invalid. Protocol mandate
* the DSU be accurate or not reported.
*/
spin_lock(&objlay->lock);
if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
objlay->delta_space_valid = OBJ_DSU_VALID;
objlay->delta_space_used += space_used;
}
spin_unlock(&objlay->lock);
}
extern void objlayout_read_done(struct objlayout_io_res *oir,
ssize_t status, bool sync);
extern void objlayout_write_done(struct objlayout_io_res *oir,
ssize_t status, bool sync);
/*
* exported generic objects function vectors
*/
extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
extern struct pnfs_layout_segment *objlayout_alloc_lseg(
struct pnfs_layout_hdr *,
struct nfs4_layoutget_res *,
gfp_t gfp_flags);
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
struct nfs_pgio_header *);
extern enum pnfs_try_status objlayout_write_pagelist(
struct nfs_pgio_header *,
int how);
extern void objlayout_encode_layoutcommit(
struct pnfs_layout_hdr *,
struct xdr_stream *,
const struct nfs4_layoutcommit_args *);
extern void objlayout_encode_layoutreturn(
struct pnfs_layout_hdr *,
struct xdr_stream *,
const struct nfs4_layoutreturn_args *);
extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
#endif /* _OBJLAYOUT_H */

View file

@ -0,0 +1,415 @@
/*
* Object-Based pNFS Layout XDR layer
*
* Copyright (C) 2007 Panasas Inc. [year of first publication]
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
* Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* See the file COPYING included with this distribution for more details.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Panasas company nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/pnfs_osd_xdr.h>
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
* The following implementation is based on RFC5664
*/
/*
* struct pnfs_osd_objid {
* struct nfs4_deviceid oid_device_id;
* u64 oid_partition_id;
* u64 oid_object_id;
* }; // xdr size 32 bytes
*/
static __be32 *
_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
{
p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
sizeof(objid->oid_device_id.data));
p = xdr_decode_hyper(p, &objid->oid_partition_id);
p = xdr_decode_hyper(p, &objid->oid_object_id);
return p;
}
/*
* struct pnfs_osd_opaque_cred {
* u32 cred_len;
* void *cred;
* }; // xdr size [variable]
* The return pointers are from the xdr buffer
*/
static int
_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
struct xdr_stream *xdr)
{
__be32 *p = xdr_inline_decode(xdr, 1);
if (!p)
return -EINVAL;
opaque_cred->cred_len = be32_to_cpu(*p++);
p = xdr_inline_decode(xdr, opaque_cred->cred_len);
if (!p)
return -EINVAL;
opaque_cred->cred = p;
return 0;
}
/*
* struct pnfs_osd_object_cred {
* struct pnfs_osd_objid oc_object_id;
* u32 oc_osd_version;
* u32 oc_cap_key_sec;
* struct pnfs_osd_opaque_cred oc_cap_key
* struct pnfs_osd_opaque_cred oc_cap;
* }; // xdr size 32 + 4 + 4 + [variable] + [variable]
*/
static int
_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
struct xdr_stream *xdr)
{
__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
int ret;
if (!p)
return -EIO;
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
comp->oc_osd_version = be32_to_cpup(p++);
comp->oc_cap_key_sec = be32_to_cpup(p);
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
if (unlikely(ret))
return ret;
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
return ret;
}
/*
* struct pnfs_osd_data_map {
* u32 odm_num_comps;
* u64 odm_stripe_unit;
* u32 odm_group_width;
* u32 odm_group_depth;
* u32 odm_mirror_cnt;
* u32 odm_raid_algorithm;
* }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
*/
static inline int
_osd_data_map_xdr_sz(void)
{
return 4 + 8 + 4 + 4 + 4 + 4;
}
static __be32 *
_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
{
data_map->odm_num_comps = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
data_map->odm_group_width = be32_to_cpup(p++);
data_map->odm_group_depth = be32_to_cpup(p++);
data_map->odm_mirror_cnt = be32_to_cpup(p++);
data_map->odm_raid_algorithm = be32_to_cpup(p++);
dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
__func__,
data_map->odm_num_comps,
(unsigned long long)data_map->odm_stripe_unit,
data_map->odm_group_width,
data_map->odm_group_depth,
data_map->odm_mirror_cnt,
data_map->odm_raid_algorithm);
return p;
}
int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
{
__be32 *p;
memset(iter, 0, sizeof(*iter));
p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
if (unlikely(!p))
return -EINVAL;
p = _osd_xdr_decode_data_map(p, &layout->olo_map);
layout->olo_comps_index = be32_to_cpup(p++);
layout->olo_num_comps = be32_to_cpup(p++);
dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
layout->olo_comps_index, layout->olo_num_comps);
iter->total_comps = layout->olo_num_comps;
return 0;
}
bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
int *err)
{
BUG_ON(iter->decoded_comps > iter->total_comps);
if (iter->decoded_comps == iter->total_comps)
return false;
*err = _osd_xdr_decode_object_cred(comp, xdr);
if (unlikely(*err)) {
dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
"total_comps=%d\n", __func__, *err,
iter->decoded_comps, iter->total_comps);
return false; /* stop the loop */
}
dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
"key_len=%u cap_len=%u\n",
__func__,
_DEVID_LO(&comp->oc_object_id.oid_device_id),
_DEVID_HI(&comp->oc_object_id.oid_device_id),
comp->oc_object_id.oid_partition_id,
comp->oc_object_id.oid_object_id,
comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
iter->decoded_comps++;
return true;
}
/*
* Get Device Information Decoding
*
* Note: since Device Information is currently done synchronously, all
* variable strings fields are left inside the rpc buffer and are only
* pointed to by the pnfs_osd_deviceaddr members. So the read buffer
* should not be freed while the returned information is in use.
*/
/*
*struct nfs4_string {
* unsigned int len;
* char *data;
*}; // size [variable]
* NOTE: Returned string points to inside the XDR buffer
*/
static __be32 *
__read_u8_opaque(__be32 *p, struct nfs4_string *str)
{
str->len = be32_to_cpup(p++);
str->data = (char *)p;
p += XDR_QUADLEN(str->len);
return p;
}
/*
* struct pnfs_osd_targetid {
* u32 oti_type;
* struct nfs4_string oti_scsi_device_id;
* };// size 4 + [variable]
*/
static __be32 *
__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
{
u32 oti_type;
oti_type = be32_to_cpup(p++);
targetid->oti_type = oti_type;
switch (oti_type) {
case OBJ_TARGET_SCSI_NAME:
case OBJ_TARGET_SCSI_DEVICE_ID:
p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
}
return p;
}
/*
* struct pnfs_osd_net_addr {
* struct nfs4_string r_netid;
* struct nfs4_string r_addr;
* };
*/
static __be32 *
__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
{
p = __read_u8_opaque(p, &netaddr->r_netid);
p = __read_u8_opaque(p, &netaddr->r_addr);
return p;
}
/*
* struct pnfs_osd_targetaddr {
* u32 ota_available;
* struct pnfs_osd_net_addr ota_netaddr;
* };
*/
static __be32 *
__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
{
u32 ota_available;
ota_available = be32_to_cpup(p++);
targetaddr->ota_available = ota_available;
if (ota_available)
p = __read_net_addr(p, &targetaddr->ota_netaddr);
return p;
}
/*
* struct pnfs_osd_deviceaddr {
* struct pnfs_osd_targetid oda_targetid;
* struct pnfs_osd_targetaddr oda_targetaddr;
* u8 oda_lun[8];
* struct nfs4_string oda_systemid;
* struct pnfs_osd_object_cred oda_root_obj_cred;
* struct nfs4_string oda_osdname;
* };
*/
/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
* not have an xdr_stream
*/
static __be32 *
__read_opaque_cred(__be32 *p,
struct pnfs_osd_opaque_cred *opaque_cred)
{
opaque_cred->cred_len = be32_to_cpu(*p++);
opaque_cred->cred = p;
return p + XDR_QUADLEN(opaque_cred->cred_len);
}
static __be32 *
__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
{
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
comp->oc_osd_version = be32_to_cpup(p++);
comp->oc_cap_key_sec = be32_to_cpup(p++);
p = __read_opaque_cred(p, &comp->oc_cap_key);
p = __read_opaque_cred(p, &comp->oc_cap);
return p;
}
void pnfs_osd_xdr_decode_deviceaddr(
struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
{
p = __read_targetid(p, &deviceaddr->oda_targetid);
p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
sizeof(deviceaddr->oda_lun));
p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
/* libosd likes this terminated in dbg. It's last, so no problems */
deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
}
/*
* struct pnfs_osd_layoutupdate {
* u32 dsu_valid;
* s64 dsu_delta;
* u32 olu_ioerr_flag;
* }; xdr size 4 + 8 + 4
*/
int
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
struct pnfs_osd_layoutupdate *lou)
{
__be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4);
if (!p)
return -E2BIG;
*p++ = cpu_to_be32(lou->dsu_valid);
if (lou->dsu_valid)
p = xdr_encode_hyper(p, lou->dsu_delta);
*p++ = cpu_to_be32(lou->olu_ioerr_flag);
return 0;
}
/*
* struct pnfs_osd_objid {
* struct nfs4_deviceid oid_device_id;
* u64 oid_partition_id;
* u64 oid_object_id;
* }; // xdr size 32 bytes
*/
static inline __be32 *
pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
{
p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
sizeof(object_id->oid_device_id.data));
p = xdr_encode_hyper(p, object_id->oid_partition_id);
p = xdr_encode_hyper(p, object_id->oid_object_id);
return p;
}
/*
* struct pnfs_osd_ioerr {
* struct pnfs_osd_objid oer_component;
* u64 oer_comp_offset;
* u64 oer_comp_length;
* u32 oer_iswrite;
* u32 oer_errno;
* }; // xdr size 32 + 24 bytes
*/
void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
{
p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
p = xdr_encode_hyper(p, ioerr->oer_comp_length);
*p++ = cpu_to_be32(ioerr->oer_iswrite);
*p = cpu_to_be32(ioerr->oer_errno);
}
__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, 32 + 24);
if (unlikely(!p))
dprintk("%s: out of xdr space\n", __func__);
return p;
}

1104
fs/nfs/pagelist.c Normal file

File diff suppressed because it is too large Load diff

1979
fs/nfs/pnfs.c Normal file

File diff suppressed because it is too large Load diff

573
fs/nfs/pnfs.h Normal file
View file

@ -0,0 +1,573 @@
/*
* pNFS client data structures.
*
* Copyright (c) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand <dhildebz@umich.edu>
*
* Permission is granted to use, copy, create derivative works, and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* This software is provided as is, without representation or warranty
* of any kind either express or implied, including without limitation
* the implied warranties of merchantability, fitness for a particular
* purpose, or noninfringement. The Regents of the University of
* Michigan shall not be liable for any damages, including special,
* indirect, incidental, or consequential damages, with respect to any
* claim arising out of or in connection with the use of the software,
* even if it has been or is hereafter advised of the possibility of
* such damages.
*/
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/workqueue.h>
enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
NFS_LSEG_ROC, /* roc bit received from server */
NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */
};
struct pnfs_layout_segment {
struct list_head pls_list;
struct list_head pls_lc_list;
struct pnfs_layout_range pls_range;
atomic_t pls_refcount;
unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout;
struct work_struct pls_work;
};
enum pnfs_try_status {
PNFS_ATTEMPTED = 0,
PNFS_NOT_ATTEMPTED = 1,
};
#ifdef CONFIG_NFS_V4_1
#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_ROC, /* some lseg had roc bit set */
NFS_LAYOUT_RETURN, /* Return this layout ASAP */
NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
};
enum layoutdriver_policy_flags {
/* Should the pNFS client commit and return the layout upon truncate to
* a smaller size */
PNFS_LAYOUTRET_ON_SETATTR = 1 << 0,
PNFS_LAYOUTRET_ON_ERROR = 1 << 1,
PNFS_READ_WHOLE_PAGE = 1 << 2,
};
struct nfs4_deviceid_node;
/* Per-layout driver specific registration structure */
struct pnfs_layoutdriver_type {
struct list_head pnfs_tblid;
const u32 id;
const char *name;
struct module *owner;
unsigned flags;
unsigned max_deviceinfo_size;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
int (*clear_layoutdriver) (struct nfs_server *);
struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags);
void (*free_layout_hdr) (struct pnfs_layout_hdr *);
struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
void (*free_lseg) (struct pnfs_layout_segment *lseg);
void (*return_range) (struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range);
/* test for nfs page cache coalescing */
const struct nfs_pageio_ops *pg_read_ops;
const struct nfs_pageio_ops *pg_write_ops;
struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
void (*mark_request_commit) (struct nfs_page *req,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
void (*clear_request_commit) (struct nfs_page *req,
struct nfs_commit_info *cinfo);
int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
struct page *page);
int (*commit_pagelist)(struct inode *inode,
struct list_head *mds_pages,
int how,
struct nfs_commit_info *cinfo);
/*
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
struct nfs4_deviceid_node * (*alloc_deviceid_node)
(struct nfs_server *server, struct pnfs_device *pdev,
gfp_t gfp_flags);
void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
struct xdr_stream *xdr,
const struct nfs4_layoutreturn_args *args);
void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
};
struct pnfs_layout_hdr {
atomic_t plh_refcount;
struct list_head plh_layouts; /* other client layouts */
struct list_head plh_bulk_destroy;
struct list_head plh_segs; /* layout segments list */
nfs4_stateid plh_stateid;
atomic_t plh_outstanding; /* number of RPCs out */
unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
u32 plh_barrier; /* ignore lower seqids */
unsigned long plh_retry_timestamp;
unsigned long plh_flags;
loff_t plh_lwb; /* last write byte for layoutcommit */
struct rpc_cred *plh_lc_cred; /* layoutcommit cred */
struct inode *plh_inode;
};
struct pnfs_device {
struct nfs4_deviceid dev_id;
unsigned int layout_type;
unsigned int mincount;
unsigned int maxcount; /* gdia_maxcount */
struct page **pages;
unsigned int pgbase;
unsigned int pglen; /* reply buffer length */
};
#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
struct pnfs_devicelist {
unsigned int eof;
unsigned int num_devs;
struct nfs4_deviceid dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
};
extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
/* nfs4proc.c */
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
struct rpc_cred *cred);
extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev, struct nfs_page *req);
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
struct nfs_fsid *fsid,
bool is_recall);
int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state);
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
void pnfs_set_layoutcommit(struct nfs_pgio_header *);
void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_pgio_header *);
void pnfs_ld_read_done(struct nfs_pgio_header *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
u64 count,
enum pnfs_iomode iomode,
gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */
};
/* pnfs_dev.c */
struct nfs4_deviceid_node {
struct hlist_node node;
struct hlist_node tmpnode;
const struct pnfs_layoutdriver_type *ld;
const struct nfs_client *nfs_client;
unsigned long flags;
unsigned long timestamp_unavailable;
struct nfs4_deviceid deviceid;
atomic_t ref;
};
struct nfs4_deviceid_node *
nfs4_find_get_deviceid(struct nfs_server *server,
const struct nfs4_deviceid *id, struct rpc_cred *cred,
gfp_t gfp_mask);
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *,
const struct nfs4_deviceid *);
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
void nfs4_deviceid_purge_client(const struct nfs_client *);
static inline struct nfs4_deviceid_node *
nfs4_get_deviceid(struct nfs4_deviceid_node *d)
{
atomic_inc(&d->ref);
return d;
}
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
atomic_inc(&lseg->pls_refcount);
smp_mb__after_atomic();
}
return lseg;
}
/* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{
return nfss->pnfs_curr_ld != NULL;
}
static inline int
pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
struct nfs_commit_info *cinfo)
{
if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
return PNFS_NOT_ATTEMPTED;
return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
}
static inline struct pnfs_ds_commit_info *
pnfs_get_ds_info(struct inode *inode)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->get_ds_info == NULL)
return NULL;
return ld->get_ds_info(inode);
}
static inline bool
pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (lseg == NULL || ld->mark_request_commit == NULL)
return false;
ld->mark_request_commit(req, lseg, cinfo);
return true;
}
static inline bool
pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->clear_request_commit == NULL)
return false;
ld->clear_request_commit(req, cinfo);
return true;
}
static inline int
pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
int max)
{
if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
return 0;
else
return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
}
static inline void
pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
struct nfs_commit_info *cinfo)
{
if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
return;
NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
}
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct page *page)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->search_commit_reqs == NULL)
return NULL;
return ld->search_commit_reqs(cinfo, page);
}
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
if (!pnfs_enabled_sb(NFS_SERVER(inode)))
return false;
return NFS_SERVER(inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_SETATTR;
}
static inline bool
pnfs_ld_read_whole_page(struct inode *inode)
{
if (!pnfs_enabled_sb(NFS_SERVER(inode)))
return false;
return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE;
}
static inline bool
pnfs_layoutcommit_outstanding(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags) != 0 ||
test_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags) != 0;
}
static inline int pnfs_return_layout(struct inode *ino)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *nfss = NFS_SERVER(ino);
if (pnfs_enabled_sb(nfss) && nfsi->layout)
return _pnfs_return_layout(ino);
return 0;
}
static inline bool
pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
struct nfs_server *nfss)
{
return (dst && src && src->bm != 0 && nfss->pnfs_curr_ld &&
nfss->pnfs_curr_ld->id == src->l_type);
}
#ifdef NFS_DEBUG
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
#else
static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id)
{
}
#endif /* NFS_DEBUG */
#else /* CONFIG_NFS_V4_1 */
static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
{
}
static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
{
}
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
return NULL;
}
static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{
}
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
}
static inline int pnfs_commit_and_return_layout(struct inode *inode)
{
return 0;
}
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
return false;
}
static inline bool
pnfs_ld_read_whole_page(struct inode *inode)
{
return false;
}
static inline bool
pnfs_roc(struct inode *ino)
{
return false;
}
static inline void
pnfs_roc_release(struct inode *ino)
{
}
static inline void
pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
{
}
static inline bool
pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
{
return false;
}
static inline void set_pnfs_layoutdriver(struct nfs_server *s,
const struct nfs_fh *mntfh, u32 id)
{
}
static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
static inline int
pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
struct nfs_commit_info *cinfo)
{
return PNFS_NOT_ATTEMPTED;
}
static inline struct pnfs_ds_commit_info *
pnfs_get_ds_info(struct inode *inode)
{
return NULL;
}
static inline bool
pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo)
{
return false;
}
static inline bool
pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
return false;
}
static inline int
pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
int max)
{
return 0;
}
static inline void
pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
struct nfs_commit_info *cinfo)
{
}
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct page *page)
{
return NULL;
}
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
}
static inline bool
pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
struct nfs_server *nfss)
{
return false;
}
static inline bool
pnfs_layoutcommit_outstanding(struct inode *inode)
{
return false;
}
static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
{
return NULL;
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */

360
fs/nfs/pnfs_dev.c Normal file
View file

@ -0,0 +1,360 @@
/*
* Device operations for the pnfs client.
*
* Copyright (c) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand <dhildebz@umich.edu>
* Garth Goodson <Garth.Goodson@netapp.com>
*
* Permission is granted to use, copy, create derivative works, and
* redistribute this software and such derivative works for any purpose,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific, written prior authorization. If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software, then the disclaimer below must also be included.
*
* This software is provided as is, without representation or warranty
* of any kind either express or implied, including without limitation
* the implied warranties of merchantability, fitness for a particular
* purpose, or noninfringement. The Regents of the University of
* Michigan shall not be liable for any damages, including special,
* indirect, incidental, or consequential damages, with respect to any
* claim arising out of or in connection with the use of the software,
* even if it has been or is hereafter advised of the possibility of
* such damages.
*/
#include <linux/export.h>
#include <linux/nfs_fs.h>
#include "nfs4session.h"
#include "internal.h"
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
/*
* Device ID RCU cache. A device ID is unique per server and layout type.
*/
#define NFS4_DEVICE_ID_HASH_BITS 5
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
static DEFINE_SPINLOCK(nfs4_deviceid_lock);
#ifdef NFS_DEBUG
void
nfs4_print_deviceid(const struct nfs4_deviceid *id)
{
u32 *p = (u32 *)id;
dprintk("%s: device id= [%x%x%x%x]\n", __func__,
p[0], p[1], p[2], p[3]);
}
EXPORT_SYMBOL_GPL(nfs4_print_deviceid);
#endif
static inline u32
nfs4_deviceid_hash(const struct nfs4_deviceid *id)
{
unsigned char *cptr = (unsigned char *)id->data;
unsigned int nbytes = NFS4_DEVICEID4_SIZE;
u32 x = 0;
while (nbytes--) {
x *= 37;
x += *cptr++;
}
return x & NFS4_DEVICE_ID_HASH_MASK;
}
static struct nfs4_deviceid_node *
_lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
const struct nfs_client *clp, const struct nfs4_deviceid *id,
long hash)
{
struct nfs4_deviceid_node *d;
hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
if (d->ld == ld && d->nfs_client == clp &&
!memcmp(&d->deviceid, id, sizeof(*id))) {
if (atomic_read(&d->ref))
return d;
else
continue;
}
return NULL;
}
static struct nfs4_deviceid_node *
nfs4_get_device_info(struct nfs_server *server,
const struct nfs4_deviceid *dev_id,
struct rpc_cred *cred, gfp_t gfp_flags)
{
struct nfs4_deviceid_node *d = NULL;
struct pnfs_device *pdev = NULL;
struct page **pages = NULL;
u32 max_resp_sz;
int max_pages;
int rc, i;
/*
* Use the session max response size as the basis for setting
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
if (server->pnfs_curr_ld->max_deviceinfo_size &&
server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz)
max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size;
max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s: server %p max_resp_sz %u max_pages %d\n",
__func__, server, max_resp_sz, max_pages);
pdev = kzalloc(sizeof(*pdev), gfp_flags);
if (!pdev)
return NULL;
pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
if (!pages)
goto out_free_pdev;
for (i = 0; i < max_pages; i++) {
pages[i] = alloc_page(gfp_flags);
if (!pages[i])
goto out_free_pages;
}
memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
pdev->layout_type = server->pnfs_curr_ld->id;
pdev->pages = pages;
pdev->pgbase = 0;
pdev->pglen = max_resp_sz;
pdev->mincount = 0;
pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
rc = nfs4_proc_getdeviceinfo(server, pdev, cred);
dprintk("%s getdevice info returns %d\n", __func__, rc);
if (rc)
goto out_free_pages;
/*
* Found new device, need to decode it and then add it to the
* list of known devices for this mountpoint.
*/
d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev,
gfp_flags);
out_free_pages:
for (i = 0; i < max_pages; i++)
__free_page(pages[i]);
kfree(pages);
out_free_pdev:
kfree(pdev);
dprintk("<-- %s d %p\n", __func__, d);
return d;
}
/*
* Lookup a deviceid in cache and get a reference count on it if found
*
* @clp nfs_client associated with deviceid
* @id deviceid to look up
*/
static struct nfs4_deviceid_node *
__nfs4_find_get_deviceid(struct nfs_server *server,
const struct nfs4_deviceid *id, long hash)
{
struct nfs4_deviceid_node *d;
rcu_read_lock();
d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id,
hash);
if (d != NULL)
atomic_inc(&d->ref);
rcu_read_unlock();
return d;
}
struct nfs4_deviceid_node *
nfs4_find_get_deviceid(struct nfs_server *server,
const struct nfs4_deviceid *id, struct rpc_cred *cred,
gfp_t gfp_mask)
{
long hash = nfs4_deviceid_hash(id);
struct nfs4_deviceid_node *d, *new;
d = __nfs4_find_get_deviceid(server, id, hash);
if (d)
return d;
new = nfs4_get_device_info(server, id, cred, gfp_mask);
if (!new)
return new;
spin_lock(&nfs4_deviceid_lock);
d = __nfs4_find_get_deviceid(server, id, hash);
if (d) {
spin_unlock(&nfs4_deviceid_lock);
server->pnfs_curr_ld->free_deviceid_node(new);
return d;
}
hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
atomic_inc(&new->ref);
spin_unlock(&nfs4_deviceid_lock);
return new;
}
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
/*
* Remove a deviceid from cache
*
* @clp nfs_client associated with deviceid
* @id the deviceid to unhash
*
* @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
*/
void
nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
const struct nfs_client *clp, const struct nfs4_deviceid *id)
{
struct nfs4_deviceid_node *d;
spin_lock(&nfs4_deviceid_lock);
rcu_read_lock();
d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id));
rcu_read_unlock();
if (!d) {
spin_unlock(&nfs4_deviceid_lock);
return;
}
hlist_del_init_rcu(&d->node);
spin_unlock(&nfs4_deviceid_lock);
synchronize_rcu();
/* balance the initial ref set in pnfs_insert_deviceid */
if (atomic_dec_and_test(&d->ref))
d->ld->free_deviceid_node(d);
}
EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
void
nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, struct nfs_server *server,
const struct nfs4_deviceid *id)
{
INIT_HLIST_NODE(&d->node);
INIT_HLIST_NODE(&d->tmpnode);
d->ld = server->pnfs_curr_ld;
d->nfs_client = server->nfs_client;
d->flags = 0;
d->deviceid = *id;
atomic_set(&d->ref, 1);
}
EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);
/*
* Dereference a deviceid node and delete it when its reference count drops
* to zero.
*
* @d deviceid node to put
*
* return true iff the node was deleted
* Note that since the test for d->ref == 0 is sufficient to establish
* that the node is no longer hashed in the global device id cache.
*/
bool
nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
{
if (!atomic_dec_and_test(&d->ref))
return false;
d->ld->free_deviceid_node(d);
return true;
}
EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
void
nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
{
node->timestamp_unavailable = jiffies;
set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
}
EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable);
bool
nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node)
{
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
unsigned long start, end;
end = jiffies;
start = end - PNFS_DEVICE_RETRY_TIMEOUT;
if (time_in_range(node->timestamp_unavailable, start, end))
return true;
clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
}
return false;
}
EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable);
static void
_deviceid_purge_client(const struct nfs_client *clp, long hash)
{
struct nfs4_deviceid_node *d;
HLIST_HEAD(tmp);
spin_lock(&nfs4_deviceid_lock);
rcu_read_lock();
hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
if (d->nfs_client == clp && atomic_read(&d->ref)) {
hlist_del_init_rcu(&d->node);
hlist_add_head(&d->tmpnode, &tmp);
}
rcu_read_unlock();
spin_unlock(&nfs4_deviceid_lock);
if (hlist_empty(&tmp))
return;
synchronize_rcu();
while (!hlist_empty(&tmp)) {
d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
hlist_del(&d->tmpnode);
if (atomic_dec_and_test(&d->ref))
d->ld->free_deviceid_node(d);
}
}
void
nfs4_deviceid_purge_client(const struct nfs_client *clp)
{
long h;
if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
return;
for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
_deviceid_purge_client(clp, h);
}
/*
* Stop use of all deviceids associated with an nfs_client
*/
void
nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
{
struct nfs4_deviceid_node *d;
int i;
rcu_read_lock();
for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node)
if (d->nfs_client == clp)
set_bit(NFS_DEVICEID_INVALID, &d->flags);
}
rcu_read_unlock();
}

751
fs/nfs/proc.c Normal file
View file

@ -0,0 +1,751 @@
/*
* linux/fs/nfs/proc.c
*
* Copyright (C) 1992, 1993, 1994 Rick Sladkey
*
* OS-independent nfs remote procedure call functions
*
* Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
* so at last we can have decent(ish) throughput off a
* Sun server.
*
* Coding optimized and cleaned up by Florian La Roche.
* Note: Error returns are optimized for NFS_OK, which isn't translated via
* nfs_stat_to_errno(), but happens to be already the right return code.
*
* Also, the code currently doesn't check the size of the packet, when
* it decodes the packet.
*
* Feel free to fix it and mail me the diffs if it worries you.
*
* Completely rewritten to support the new RPC call interface;
* rewrote and moved the entire XDR stuff to xdr.c
* --Olaf Kirch June 1996
*
* The code below initializes all auto variables explicitly, otherwise
* it will fail to work as a module (gcc generates a memset call for an
* incomplete struct).
*/
#include <linux/types.h>
#include <linux/param.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/lockd/bind.h>
#include <linux/freezer.h>
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_PROC
/*
* Bare-bones access to getattr: this is for nfs_read_super.
*/
static int
nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs_fattr *fattr = info->fattr;
struct nfs2_fsstat fsinfo;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
.rpc_argp = fhandle,
.rpc_resp = fattr,
};
int status;
dprintk("%s: call getattr\n", __func__);
nfs_fattr_init(fattr);
status = rpc_call_sync(server->client, &msg, 0);
/* Retry with default authentication if different */
if (status && server->nfs_client->cl_rpcclient != server->client)
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("%s: reply getattr: %d\n", __func__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __func__);
msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
msg.rpc_resp = &fsinfo;
status = rpc_call_sync(server->client, &msg, 0);
/* Retry with default authentication if different */
if (status && server->nfs_client->cl_rpcclient != server->client)
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("%s: reply statfs: %d\n", __func__, status);
if (status)
return status;
info->rtmax = NFS_MAXDATA;
info->rtpref = fsinfo.tsize;
info->rtmult = fsinfo.bsize;
info->wtmax = NFS_MAXDATA;
info->wtpref = fsinfo.tsize;
info->wtmult = fsinfo.bsize;
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
return 0;
}
/*
* One function for each procedure in the NFS protocol.
*/
static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
.rpc_argp = fhandle,
.rpc_resp = fattr,
};
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
static int
nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct iattr *sattr)
{
struct inode *inode = dentry->d_inode;
struct nfs_sattrargs arg = {
.fh = NFS_FH(inode),
.sattr = sattr
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_SETATTR],
.rpc_argp = &arg,
.rpc_resp = fattr,
};
int status;
/* Mask out the non-modebit related stuff from attr->ia_mode */
sattr->ia_mode &= S_IALLUGO;
dprintk("NFS call setattr\n");
if (sattr->ia_valid & ATTR_FILE)
msg.rpc_cred = nfs_file_cred(sattr->ia_file);
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
static int
nfs_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct nfs_diropok res = {
.fh = fhandle,
.fattr = fattr
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_LOOKUP],
.rpc_argp = &arg,
.rpc_resp = &res,
};
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
static int nfs_proc_readlink(struct inode *inode, struct page *page,
unsigned int pgbase, unsigned int pglen)
{
struct nfs_readlinkargs args = {
.fh = NFS_FH(inode),
.pgbase = pgbase,
.pglen = pglen,
.pages = &page
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READLINK],
.rpc_argp = &args,
};
int status;
dprintk("NFS call readlink\n");
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
struct nfs_createdata {
struct nfs_createargs arg;
struct nfs_diropok res;
struct nfs_fh fhandle;
struct nfs_fattr fattr;
};
static struct nfs_createdata *nfs_alloc_createdata(struct inode *dir,
struct dentry *dentry, struct iattr *sattr)
{
struct nfs_createdata *data;
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (data != NULL) {
data->arg.fh = NFS_FH(dir);
data->arg.name = dentry->d_name.name;
data->arg.len = dentry->d_name.len;
data->arg.sattr = sattr;
nfs_fattr_init(&data->fattr);
data->fhandle.size = 0;
data->res.fh = &data->fhandle;
data->res.fattr = &data->fattr;
}
return data;
};
static void nfs_free_createdata(const struct nfs_createdata *data)
{
kfree(data);
}
static int
nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct nfs_createdata *data;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_CREATE],
};
int status = -ENOMEM;
dprintk("NFS call create %pd\n", dentry);
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
nfs_free_createdata(data);
out:
dprintk("NFS reply create: %d\n", status);
return status;
}
/*
* In NFSv2, mknod is grafted onto the create call.
*/
static int
nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
dev_t rdev)
{
struct nfs_createdata *data;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_CREATE],
};
umode_t mode;
int status = -ENOMEM;
dprintk("NFS call mknod %pd\n", dentry);
mode = sattr->ia_mode;
if (S_ISFIFO(mode)) {
sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
sattr->ia_valid &= ~ATTR_SIZE;
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
sattr->ia_valid |= ATTR_SIZE;
sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
}
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == -EINVAL && S_ISFIFO(mode)) {
sattr->ia_mode = mode;
nfs_fattr_init(data->res.fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
if (status == 0)
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
nfs_free_createdata(data);
out:
dprintk("NFS reply mknod: %d\n", status);
return status;
}
static int
nfs_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
.name = *name,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
.rpc_argp = &arg,
};
int status;
dprintk("NFS call remove %s\n", name->name);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply remove: %d\n", status);
return status;
}
static void
nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
}
static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
{
rpc_call_start(task);
}
static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
{
nfs_mark_for_revalidate(dir);
return 1;
}
static void
nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
}
static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
{
rpc_call_start(task);
}
static int
nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
struct inode *new_dir)
{
nfs_mark_for_revalidate(old_dir);
nfs_mark_for_revalidate(new_dir);
return 1;
}
static int
nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_linkargs arg = {
.fromfh = NFS_FH(inode),
.tofh = NFS_FH(dir),
.toname = name->name,
.tolen = name->len
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_LINK],
.rpc_argp = &arg,
};
int status;
dprintk("NFS call link %s\n", name->name);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_mark_for_revalidate(inode);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply link: %d\n", status);
return status;
}
static int
nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
unsigned int len, struct iattr *sattr)
{
struct nfs_fh *fh;
struct nfs_fattr *fattr;
struct nfs_symlinkargs arg = {
.fromfh = NFS_FH(dir),
.fromname = dentry->d_name.name,
.fromlen = dentry->d_name.len,
.pages = &page,
.pathlen = len,
.sattr = sattr
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
.rpc_argp = &arg,
};
int status = -ENAMETOOLONG;
dprintk("NFS call symlink %pd\n", dentry);
if (len > NFS2_MAXPATHLEN)
goto out;
fh = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr();
status = -ENOMEM;
if (fh == NULL || fattr == NULL)
goto out_free;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
/*
* V2 SYMLINK requests don't return any attributes. Setting the
* filehandle size to zero indicates to nfs_instantiate that it
* should fill in the data with a LOOKUP call on the wire.
*/
if (status == 0)
status = nfs_instantiate(dentry, fh, fattr, NULL);
out_free:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
out:
dprintk("NFS reply symlink: %d\n", status);
return status;
}
static int
nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct nfs_createdata *data;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
};
int status = -ENOMEM;
dprintk("NFS call mkdir %pd\n", dentry);
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
nfs_free_createdata(data);
out:
dprintk("NFS reply mkdir: %d\n", status);
return status;
}
static int
nfs_proc_rmdir(struct inode *dir, struct qstr *name)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_RMDIR],
.rpc_argp = &arg,
};
int status;
dprintk("NFS call rmdir %s\n", name->name);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply rmdir: %d\n", status);
return status;
}
/*
* The READDIR implementation is somewhat hackish - we pass a temporary
* buffer to the encode function, which installs it in the receive
* the receive iovec. The decode function just parses the reply to make
* sure it is syntactically correct; the entries itself are decoded
* from nfs_readdir by calling the decode_entry function directly.
*/
static int
nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
{
struct inode *dir = dentry->d_inode;
struct nfs_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
.pages = pages,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
.rpc_cred = cred,
};
int status;
dprintk("NFS call readdir %d\n", (unsigned int)cookie);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_invalidate_atime(dir);
dprintk("NFS reply readdir: %d\n", status);
return status;
}
static int
nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct nfs2_fsstat fsinfo;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_STATFS],
.rpc_argp = fhandle,
.rpc_resp = &fsinfo,
};
int status;
dprintk("NFS call statfs\n");
nfs_fattr_init(stat->fattr);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
if (status)
goto out;
stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize;
stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
stat->tfiles = 0;
stat->ffiles = 0;
stat->afiles = 0;
out:
return status;
}
static int
nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs2_fsstat fsinfo;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_STATFS],
.rpc_argp = fhandle,
.rpc_resp = &fsinfo,
};
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
if (status)
goto out;
info->rtmax = NFS_MAXDATA;
info->rtpref = fsinfo.tsize;
info->rtmult = fsinfo.bsize;
info->wtmax = NFS_MAXDATA;
info->wtpref = fsinfo.tsize;
info->wtmult = fsinfo.bsize;
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
out:
return status;
}
static int
nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
info->max_link = 0;
info->max_namelen = NFS2_MAXNAMLEN;
return 0;
}
static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
nfs_invalidate_atime(inode);
if (task->tk_status >= 0) {
nfs_refresh_inode(inode, hdr->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
hdr->res.eof = 1;
}
return 0;
}
static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
}
static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
rpc_call_start(task);
return 0;
}
static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
if (task->tk_status >= 0)
nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0;
}
static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
hdr->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
}
static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
BUG();
}
static void
nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
BUG();
}
static int
nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = file_inode(filp);
return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
}
/* Helper functions for NFS lock bounds checking */
#define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL)
static int nfs_lock_check_bounds(const struct file_lock *fl)
{
__s32 start, end;
start = (__s32)fl->fl_start;
if ((loff_t)start != fl->fl_start)
goto out_einval;
if (fl->fl_end != OFFSET_MAX) {
end = (__s32)fl->fl_end;
if ((loff_t)end != fl->fl_end)
goto out_einval;
} else
end = NFS_LOCK32_OFFSET_MAX;
if (start < 0 || start > end)
goto out_einval;
return 0;
out_einval:
return -EINVAL;
}
static int nfs_have_delegation(struct inode *inode, fmode_t flags)
{
return 0;
}
static int nfs_return_delegation(struct inode *inode)
{
nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
.mkdir = nfs_mkdir,
.rmdir = nfs_rmdir,
.mknod = nfs_mknod,
.rename = nfs_rename,
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
static const struct inode_operations nfs_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
const struct nfs_rpc_ops nfs_v2_clientops = {
.version = 2, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
.file_inode_ops = &nfs_file_inode_operations,
.file_ops = &nfs_file_operations,
.getroot = nfs_proc_get_root,
.submount = nfs_submount,
.try_mount = nfs_try_mount,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
.lookup = nfs_proc_lookup,
.access = NULL, /* access */
.readlink = nfs_proc_readlink,
.create = nfs_proc_create,
.remove = nfs_proc_remove,
.unlink_setup = nfs_proc_unlink_setup,
.unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
.unlink_done = nfs_proc_unlink_done,
.rename_setup = nfs_proc_rename_setup,
.rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
.rename_done = nfs_proc_rename_done,
.link = nfs_proc_link,
.symlink = nfs_proc_symlink,
.mkdir = nfs_proc_mkdir,
.rmdir = nfs_proc_rmdir,
.readdir = nfs_proc_readdir,
.mknod = nfs_proc_mknod,
.statfs = nfs_proc_statfs,
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs2_decode_dirent,
.pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
.read_setup = nfs_proc_read_setup,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
.have_delegation = nfs_have_delegation,
.return_delegation = nfs_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
.create_server = nfs_create_server,
.clone_server = nfs_clone_server,
};

424
fs/nfs/read.c Normal file
View file

@ -0,0 +1,424 @@
/*
* linux/fs/nfs/read.c
*
* Block I/O for NFS
*
* Partial copy of Linus' read cache modifications to fs/nfs/file.c
* modified for async RPC by okir@monad.swb.de
*/
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
#include "nfs4_fs.h"
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
static struct nfs_pgio_header *nfs_readhdr_alloc(void)
{
return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
}
static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
static
int nfs_return_empty_page(struct page *page)
{
zero_user(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
unlock_page(page);
return 0;
}
void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
#ifdef CONFIG_NFS_V4_1
if (server->pnfs_curr_ld && !force_mds)
pg_ops = server->pnfs_curr_ld->pg_read_ops;
#endif
nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
server->rsize, 0);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
struct nfs_page *new;
unsigned int len;
struct nfs_pageio_descriptor pgio;
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(ctx, page, NULL, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
}
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
NFS_I(inode)->read_io += pgio.pg_bytes_written;
return 0;
}
static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *d_inode = req->wb_context->dentry->d_inode;
dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
(long long)req_offset(req));
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
if (PageUptodate(req->wb_page))
nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
unlock_page(req->wb_page);
}
nfs_release_request(req);
}
static void nfs_page_group_set_uptodate(struct nfs_page *req)
{
if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
SetPageUptodate(req->wb_page);
}
static void nfs_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
unsigned long start = req->wb_pgbase;
unsigned long end = req->wb_pgbase + req->wb_bytes;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
/* note: regions of the page not covered by a
* request are zeroed in nfs_readpage_async /
* readpage_async_filler */
if (bytes > hdr->good_bytes) {
/* nothing in this request was good, so zero
* the full extent of the request */
zero_user_segment(page, start, end);
} else if (hdr->good_bytes - bytes < req->wb_bytes) {
/* part of this request has good bytes, but
* not all. zero the bad bytes */
start += hdr->good_bytes - bytes;
WARN_ON(start < req->wb_pgbase);
zero_user_segment(page, start, end);
}
}
bytes += req->wb_bytes;
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (bytes <= hdr->good_bytes)
nfs_page_group_set_uptodate(req);
} else
nfs_page_group_set_uptodate(req);
nfs_list_remove_request(req);
nfs_readpage_release(req);
}
out:
hdr->release(hdr);
}
static void nfs_initiate_read(struct nfs_pgio_header *hdr,
struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = hdr->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
task_setup_data->flags |= swap_flags;
NFS_PROTO(inode)->read_setup(hdr, msg);
}
static void
nfs_async_read_error(struct list_head *head)
{
struct nfs_page *req;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_readpage_release(req);
}
}
static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
.error_cleanup = nfs_async_read_error,
.completion = nfs_read_completion,
};
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
static int nfs_readpage_done(struct rpc_task *task,
struct nfs_pgio_header *hdr,
struct inode *inode)
{
int status = NFS_PROTO(inode)->read_done(task, hdr);
if (status != 0)
return status;
nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
if (task->tk_status == -ESTALE) {
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
nfs_mark_for_revalidate(inode);
}
return 0;
}
static void nfs_readpage_retry(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
struct nfs_pgio_args *argp = &hdr->args;
struct nfs_pgio_res *resp = &hdr->res;
/* This is a short read! */
nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count == 0) {
nfs_set_pgio_error(hdr, -EIO, argp->offset);
return;
}
/* Yes, so retry the read at the end of the hdr */
hdr->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call_prepare(task);
}
static void nfs_readpage_result(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
if (hdr->res.eof) {
loff_t bound;
bound = hdr->args.offset + hdr->res.count;
spin_lock(&hdr->lock);
if (bound < hdr->io_start + hdr->good_bytes) {
set_bit(NFS_IOHDR_EOF, &hdr->flags);
clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
hdr->good_bytes = bound - hdr->io_start;
}
spin_unlock(&hdr->lock);
} else if (hdr->res.count != hdr->args.count)
nfs_readpage_retry(task, hdr);
}
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
* - The error flag is set for this page. This happens only when a
* previous async read operation failed.
*/
int nfs_readpage(struct file *file, struct page *page)
{
struct nfs_open_context *ctx;
struct inode *inode = page_file_mapping(page)->host;
int error;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page_file_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
nfs_add_stats(inode, NFSIOS_READPAGES, 1);
/*
* Try to flush any pending writes to the file..
*
* NOTE! Because we own the page lock, there cannot
* be any new pending writes generated at this point
* for this page (other pages can be written to).
*/
error = nfs_wb_page(inode, page);
if (error)
goto out_unlock;
if (PageUptodate(page))
goto out_unlock;
error = -ESTALE;
if (NFS_STALE(inode))
goto out_unlock;
if (file == NULL) {
error = -EBADF;
ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (ctx == NULL)
goto out_unlock;
} else
ctx = get_nfs_open_context(nfs_file_open_context(file));
if (!IS_SYNC(inode)) {
error = nfs_readpage_from_fscache(ctx, inode, page);
if (error == 0)
goto out;
}
error = nfs_readpage_async(ctx, inode, page);
out:
put_nfs_open_context(ctx);
return error;
out_unlock:
unlock_page(page);
return error;
}
struct nfs_readdesc {
struct nfs_pageio_descriptor *pgio;
struct nfs_open_context *ctx;
};
static int
readpage_async_filler(void *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
struct nfs_page *new;
unsigned int len;
int error;
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(desc->ctx, page, NULL, 0, len);
if (IS_ERR(new))
goto out_error;
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
if (!nfs_pageio_add_request(desc->pgio, new)) {
error = desc->pgio->pg_error;
goto out_unlock;
}
return 0;
out_error:
error = PTR_ERR(new);
out_unlock:
unlock_page(page);
return error;
}
int nfs_readpages(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
struct nfs_pageio_descriptor pgio;
struct nfs_readdesc desc = {
.pgio = &pgio,
};
struct inode *inode = mapping->host;
unsigned long npages;
int ret = -ESTALE;
dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode),
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
if (NFS_STALE(inode))
goto out;
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (desc.ctx == NULL)
return -EBADF;
} else
desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
/* attempt to read as many of the pages as possible from the cache
* - this returns -ENOBUFS immediately if the cookie is negative
*/
ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
pages, &nr_pages);
if (ret == 0)
goto read_complete; /* all pages were read */
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
nfs_pageio_complete(&pgio);
NFS_I(inode)->read_io += pgio.pg_bytes_written;
npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
read_complete:
put_nfs_open_context(desc.ctx);
out:
return ret;
}
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_readpagecache(void)
{
kmem_cache_destroy(nfs_rdata_cachep);
}
static const struct nfs_rw_ops nfs_rw_read_ops = {
.rw_mode = FMODE_READ,
.rw_alloc_header = nfs_readhdr_alloc,
.rw_free_header = nfs_readhdr_free,
.rw_done = nfs_readpage_done,
.rw_result = nfs_readpage_result,
.rw_initiate = nfs_initiate_read,
};

2891
fs/nfs/super.c Normal file

File diff suppressed because it is too large Load diff

78
fs/nfs/symlink.c Normal file
View file

@ -0,0 +1,78 @@
/*
* linux/fs/nfs/symlink.c
*
* Copyright (C) 1992 Rick Sladkey
*
* Optimization changes Copyright (C) 1994 Florian La Roche
*
* Jun 7 1999, cache symlink lookups in the page cache. -DaveM
*
* nfs symlink handling code
*/
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include <linux/pagemap.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/namei.h>
/* Symlink caching in the page cache is even more simplistic
* and straight-forward than readdir caching.
*/
static int nfs_symlink_filler(struct inode *inode, struct page *page)
{
int error;
error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
if (error < 0)
goto error;
SetPageUptodate(page);
unlock_page(page);
return 0;
error:
SetPageError(page);
unlock_page(page);
return -EIO;
}
static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct page *page;
void *err;
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
goto read_failed;
page = read_cache_page(&inode->i_data, 0,
(filler_t *)nfs_symlink_filler, inode);
if (IS_ERR(page)) {
err = page;
goto read_failed;
}
nd_set_link(nd, kmap(page));
return page;
read_failed:
nd_set_link(nd, err);
return NULL;
}
/*
* symlinks can't do much...
*/
const struct inode_operations nfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = nfs_follow_link,
.put_link = page_put_link,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};

64
fs/nfs/sysctl.c Normal file
View file

@ -0,0 +1,64 @@
/*
* linux/fs/nfs/sysctl.c
*
* Sysctl interface to NFS parameters
*/
#include <linux/types.h>
#include <linux/linkage.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/module.h>
#include <linux/nfs_fs.h>
static struct ctl_table_header *nfs_callback_sysctl_table;
static struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
.maxlen = sizeof(nfs_mountpoint_expiry_timeout),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nfs_congestion_kb",
.data = &nfs_congestion_kb,
.maxlen = sizeof(nfs_congestion_kb),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};
static struct ctl_table nfs_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
.child = nfs_cb_sysctls,
},
{ }
};
static struct ctl_table nfs_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = nfs_cb_sysctl_dir,
},
{ }
};
int nfs_register_sysctl(void)
{
nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root);
if (nfs_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
}
void nfs_unregister_sysctl(void)
{
unregister_sysctl_table(nfs_callback_sysctl_table);
nfs_callback_sysctl_table = NULL;
}

604
fs/nfs/unlink.c Normal file
View file

@ -0,0 +1,604 @@
/*
* linux/fs/nfs/unlink.c
*
* nfs sillydelete handling
*
*/
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/dcache.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include "internal.h"
#include "nfs4_fs.h"
#include "iostat.h"
#include "delegation.h"
#include "nfstrace.h"
/**
* nfs_free_unlinkdata - release data from a sillydelete operation.
* @data: pointer to unlink structure.
*/
static void
nfs_free_unlinkdata(struct nfs_unlinkdata *data)
{
iput(data->dir);
put_rpccred(data->cred);
kfree(data->args.name.name);
kfree(data);
}
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
/**
* nfs_copy_dname - copy dentry name to data structure
* @dentry: pointer to dentry
* @data: nfs_unlinkdata
*/
static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
{
char *str;
int len = dentry->d_name.len;
str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL);
if (!str)
return -ENOMEM;
data->args.name.len = len;
data->args.name.name = str;
return 0;
}
static void nfs_free_dname(struct nfs_unlinkdata *data)
{
kfree(data->args.name.name);
data->args.name.name = NULL;
data->args.name.len = 0;
}
static void nfs_dec_sillycount(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
if (atomic_dec_return(&nfsi->silly_count) == 1)
wake_up(&nfsi->waitqueue);
}
/**
* nfs_async_unlink_done - Sillydelete post-processing
* @task: rpc_task of the sillydelete
*
* Do the directory attribute update.
*/
static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
{
struct nfs_unlinkdata *data = calldata;
struct inode *dir = data->dir;
trace_nfs_sillyrename_unlink(data, task->tk_status);
if (!NFS_PROTO(dir)->unlink_done(task, dir))
rpc_restart_call_prepare(task);
}
/**
* nfs_async_unlink_release - Release the sillydelete data.
* @task: rpc_task of the sillydelete
*
* We need to call nfs_put_unlinkdata as a 'tk_release' task since the
* rpc_task would be freed too.
*/
static void nfs_async_unlink_release(void *calldata)
{
struct nfs_unlinkdata *data = calldata;
struct super_block *sb = data->dir->i_sb;
nfs_dec_sillycount(data->dir);
nfs_free_unlinkdata(data);
nfs_sb_deactive(sb);
}
static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_unlinkdata *data = calldata;
NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
}
static const struct rpc_call_ops nfs_unlink_ops = {
.rpc_call_done = nfs_async_unlink_done,
.rpc_release = nfs_async_unlink_release,
.rpc_call_prepare = nfs_unlink_prepare,
};
static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
{
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_message = &msg,
.callback_ops = &nfs_unlink_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
struct rpc_task *task;
struct dentry *alias;
alias = d_lookup(parent, &data->args.name);
if (alias != NULL) {
int ret;
void *devname_garbage = NULL;
/*
* Hey, we raced with lookup... See if we need to transfer
* the sillyrename information to the aliased dentry.
*/
nfs_free_dname(data);
ret = nfs_copy_dname(alias, data);
spin_lock(&alias->d_lock);
if (ret == 0 && alias->d_inode != NULL &&
!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
devname_garbage = alias->d_fsdata;
alias->d_fsdata = data;
alias->d_flags |= DCACHE_NFSFS_RENAMED;
ret = 1;
} else
ret = 0;
spin_unlock(&alias->d_lock);
nfs_dec_sillycount(dir);
dput(alias);
/*
* If we'd displaced old cached devname, free it. At that
* point dentry is definitely not a root, so we won't need
* that anymore.
*/
kfree(devname_garbage);
return ret;
}
data->dir = igrab(dir);
if (!data->dir) {
nfs_dec_sillycount(dir);
return 0;
}
nfs_sb_active(dir->i_sb);
data->args.fh = NFS_FH(dir);
nfs_fattr_init(data->res.dir_attr);
NFS_PROTO(dir)->unlink_setup(&msg, dir);
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
rpc_put_task_async(task);
return 1;
}
static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
{
struct dentry *parent;
struct inode *dir;
int ret = 0;
parent = dget_parent(dentry);
if (parent == NULL)
goto out_free;
dir = parent->d_inode;
/* Non-exclusive lock protects against concurrent lookup() calls */
spin_lock(&dir->i_lock);
if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
/* Deferred delete */
hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
spin_unlock(&dir->i_lock);
ret = 1;
goto out_dput;
}
spin_unlock(&dir->i_lock);
ret = nfs_do_call_unlink(parent, dir, data);
out_dput:
dput(parent);
out_free:
return ret;
}
void nfs_wait_on_sillyrename(struct dentry *dentry)
{
struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
}
void nfs_block_sillyrename(struct dentry *dentry)
{
struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
}
void nfs_unblock_sillyrename(struct dentry *dentry)
{
struct inode *dir = dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(dir);
struct nfs_unlinkdata *data;
atomic_inc(&nfsi->silly_count);
spin_lock(&dir->i_lock);
while (!hlist_empty(&nfsi->silly_list)) {
if (!atomic_inc_not_zero(&nfsi->silly_count))
break;
data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
hlist_del(&data->list);
spin_unlock(&dir->i_lock);
if (nfs_do_call_unlink(dentry, dir, data) == 0)
nfs_free_unlinkdata(data);
spin_lock(&dir->i_lock);
}
spin_unlock(&dir->i_lock);
}
/**
* nfs_async_unlink - asynchronous unlinking of a file
* @dir: parent directory of dentry
* @dentry: dentry to unlink
*/
static int
nfs_async_unlink(struct inode *dir, struct dentry *dentry)
{
struct nfs_unlinkdata *data;
int status = -ENOMEM;
void *devname_garbage = NULL;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
goto out;
data->cred = rpc_lookup_cred();
if (IS_ERR(data->cred)) {
status = PTR_ERR(data->cred);
goto out_free;
}
data->res.dir_attr = &data->dir_attr;
status = -EBUSY;
spin_lock(&dentry->d_lock);
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out_unlock;
dentry->d_flags |= DCACHE_NFSFS_RENAMED;
devname_garbage = dentry->d_fsdata;
dentry->d_fsdata = data;
spin_unlock(&dentry->d_lock);
/*
* If we'd displaced old cached devname, free it. At that
* point dentry is definitely not a root, so we won't need
* that anymore.
*/
kfree(devname_garbage);
return 0;
out_unlock:
spin_unlock(&dentry->d_lock);
put_rpccred(data->cred);
out_free:
kfree(data);
out:
return status;
}
/**
* nfs_complete_unlink - Initialize completion of the sillydelete
* @dentry: dentry to delete
* @inode: inode
*
* Since we're most likely to be called by dentry_iput(), we
* only use the dentry to find the sillydelete. We then copy the name
* into the qstr.
*/
void
nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
{
struct nfs_unlinkdata *data = NULL;
spin_lock(&dentry->d_lock);
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
data = dentry->d_fsdata;
dentry->d_fsdata = NULL;
}
spin_unlock(&dentry->d_lock);
if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
nfs_free_unlinkdata(data);
}
/* Cancel a queued async unlink. Called when a sillyrename run fails. */
static void
nfs_cancel_async_unlink(struct dentry *dentry)
{
spin_lock(&dentry->d_lock);
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
struct nfs_unlinkdata *data = dentry->d_fsdata;
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock);
nfs_free_unlinkdata(data);
return;
}
spin_unlock(&dentry->d_lock);
}
/**
* nfs_async_rename_done - Sillyrename post-processing
* @task: rpc_task of the sillyrename
* @calldata: nfs_renamedata for the sillyrename
*
* Do the directory attribute updates and the d_move
*/
static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
{
struct nfs_renamedata *data = calldata;
struct inode *old_dir = data->old_dir;
struct inode *new_dir = data->new_dir;
struct dentry *old_dentry = data->old_dentry;
trace_nfs_sillyrename_rename(old_dir, old_dentry,
new_dir, data->new_dentry, task->tk_status);
if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
rpc_restart_call_prepare(task);
return;
}
if (data->complete)
data->complete(task, data);
}
/**
* nfs_async_rename_release - Release the sillyrename data.
* @calldata: the struct nfs_renamedata to be released
*/
static void nfs_async_rename_release(void *calldata)
{
struct nfs_renamedata *data = calldata;
struct super_block *sb = data->old_dir->i_sb;
if (data->old_dentry->d_inode)
nfs_mark_for_revalidate(data->old_dentry->d_inode);
dput(data->old_dentry);
dput(data->new_dentry);
iput(data->old_dir);
iput(data->new_dir);
nfs_sb_deactive(sb);
put_rpccred(data->cred);
kfree(data);
}
static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_renamedata *data = calldata;
NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data);
}
static const struct rpc_call_ops nfs_rename_ops = {
.rpc_call_done = nfs_async_rename_done,
.rpc_release = nfs_async_rename_release,
.rpc_call_prepare = nfs_rename_prepare,
};
/**
* nfs_async_rename - perform an asynchronous rename operation
* @old_dir: directory that currently holds the dentry to be renamed
* @new_dir: target directory for the rename
* @old_dentry: original dentry to be renamed
* @new_dentry: dentry to which the old_dentry should be renamed
*
* It's expected that valid references to the dentries and inodes are held
*/
struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
struct dentry *old_dentry, struct dentry *new_dentry,
void (*complete)(struct rpc_task *, struct nfs_renamedata *))
{
struct nfs_renamedata *data;
struct rpc_message msg = { };
struct rpc_task_setup task_setup_data = {
.rpc_message = &msg,
.callback_ops = &nfs_rename_ops,
.workqueue = nfsiod_workqueue,
.rpc_client = NFS_CLIENT(old_dir),
.flags = RPC_TASK_ASYNC,
};
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return ERR_PTR(-ENOMEM);
task_setup_data.callback_data = data;
data->cred = rpc_lookup_cred();
if (IS_ERR(data->cred)) {
struct rpc_task *task = ERR_CAST(data->cred);
kfree(data);
return task;
}
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
msg.rpc_cred = data->cred;
/* set up nfs_renamedata */
data->old_dir = old_dir;
ihold(old_dir);
data->new_dir = new_dir;
ihold(new_dir);
data->old_dentry = dget(old_dentry);
data->new_dentry = dget(new_dentry);
nfs_fattr_init(&data->old_fattr);
nfs_fattr_init(&data->new_fattr);
data->complete = complete;
/* set up nfs_renameargs */
data->args.old_dir = NFS_FH(old_dir);
data->args.old_name = &old_dentry->d_name;
data->args.new_dir = NFS_FH(new_dir);
data->args.new_name = &new_dentry->d_name;
/* set up nfs_renameres */
data->res.old_fattr = &data->old_fattr;
data->res.new_fattr = &data->new_fattr;
nfs_sb_active(old_dir->i_sb);
NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
return rpc_run_task(&task_setup_data);
}
/*
* Perform tasks needed when a sillyrename is done such as cancelling the
* queued async unlink if it failed.
*/
static void
nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
{
struct dentry *dentry = data->old_dentry;
if (task->tk_status != 0) {
nfs_cancel_async_unlink(dentry);
return;
}
/*
* vfs_unlink and the like do not issue this when a file is
* sillyrenamed, so do it here.
*/
fsnotify_nameremove(dentry, 0);
}
#define SILLYNAME_PREFIX ".nfs"
#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1)
#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1)
#define SILLYNAME_COUNTER_LEN ((unsigned)sizeof(unsigned int) << 1)
#define SILLYNAME_LEN (SILLYNAME_PREFIX_LEN + \
SILLYNAME_FILEID_LEN + \
SILLYNAME_COUNTER_LEN)
/**
* nfs_sillyrename - Perform a silly-rename of a dentry
* @dir: inode of directory that contains dentry
* @dentry: dentry to be sillyrenamed
*
* NFSv2/3 is stateless and the server doesn't know when the client is
* holding a file open. To prevent application problems when a file is
* unlinked while it's still open, the client performs a "silly-rename".
* That is, it renames the file to a hidden file in the same directory,
* and only performs the unlink once the last reference to it is put.
*
* The final cleanup is done during dentry_iput.
*
* (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
* could take responsibility for keeping open files referenced. The server
* would also need to ensure that opened-but-deleted files were kept over
* reboots. However, we may not assume a server does so. (RFC 5661
* does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
* use to advertise that it does this; some day we may take advantage of
* it.))
*/
int
nfs_sillyrename(struct inode *dir, struct dentry *dentry)
{
static unsigned int sillycounter;
unsigned char silly[SILLYNAME_LEN + 1];
unsigned long long fileid;
struct dentry *sdentry;
struct rpc_task *task;
int error = -EBUSY;
dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
dentry, d_count(dentry));
nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
/*
* We don't allow a dentry to be silly-renamed twice.
*/
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out;
fileid = NFS_FILEID(dentry->d_inode);
/* Return delegation in anticipation of the rename */
NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode);
sdentry = NULL;
do {
int slen;
dput(sdentry);
sillycounter++;
slen = scnprintf(silly, sizeof(silly),
SILLYNAME_PREFIX "%0*llx%0*x",
SILLYNAME_FILEID_LEN, fileid,
SILLYNAME_COUNTER_LEN, sillycounter);
dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
dentry, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
* N.B. Better to return EBUSY here ... it could be
* dangerous to delete the file while it's in use.
*/
if (IS_ERR(sdentry))
goto out;
} while (sdentry->d_inode != NULL); /* need negative lookup */
/* queue unlink first. Can't do this from rpc_release as it
* has to allocate memory
*/
error = nfs_async_unlink(dir, dentry);
if (error)
goto out_dput;
/* populate unlinkdata with the right dname */
error = nfs_copy_dname(sdentry,
(struct nfs_unlinkdata *)dentry->d_fsdata);
if (error) {
nfs_cancel_async_unlink(dentry);
goto out_dput;
}
/* run the rename task, undo unlink if it fails */
task = nfs_async_rename(dir, dir, dentry, sdentry,
nfs_complete_sillyrename);
if (IS_ERR(task)) {
error = -EBUSY;
nfs_cancel_async_unlink(dentry);
goto out_dput;
}
/* wait for the RPC task to complete, unless a SIGKILL intervenes */
error = rpc_wait_for_completion_task(task);
if (error == 0)
error = task->tk_status;
switch (error) {
case 0:
/* The rename succeeded */
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
d_move(dentry, sdentry);
break;
case -ERESTARTSYS:
/* The result of the rename is unknown. Play it safe by
* forcing a new lookup */
d_drop(dentry);
d_drop(sdentry);
}
rpc_put_task(task);
out_dput:
dput(sdentry);
out:
return error;
}

1950
fs/nfs/write.c Normal file

File diff suppressed because it is too large Load diff