根据open接口分析现在将调用read(file_handle,buf_addr,buff_size);
由sysctl的.read
fs/nfs/File.c
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = nfs_file_read,
.aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_write = nfs_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
};
函数:nfs_file_read
static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_path.dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
size_t count = iov_length(iov, nr_segs);
if (iocb->ki_filp->f_flags & O_DIRECT)
//核心语句如果支持直接读写;这里的直接读写是一个内核新的特性
//iov数据结构是直接引用用户空间地址
//struct iovec
/{
// void __user *iov_base; /* BSD uses caddr_t (1003.1g requires void *) */
// __kernel_size_t iov_len; /* Must be size_t (1003.1g) */
//};
return nfs_file_direct_read(iocb, iov, nr_segs, pos);
dprintk("NFS: read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) count, (unsigned long) pos);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
result = generic_file_aio_read(iocb, iov, nr_segs, pos);
return result;
}
函数:nfs_file_direct_read
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @iov: vector of user buffers into which to read data
* @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
* READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
*
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
size_t count;
/*统计总的iovec的长度
* Total number of bytes covered by an iovec.
*
* NOTE that it is not safe to use this function until all the iovec's
* segment lengths have been validated. Because the individual lengths can
* overflow a size_t when added together.
*/
count = iov_length(iov, nr_segs);
//设置读取对其方式
nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name,
count, (long long) pos);
retval = 0;
if (!count)
goto out;
//<hy>1:unmapping pages of address_space</hy>
//取消关联具体是从其开始地址到偏移量的结束地址,这里做结束地址溢出检测
//unmap_mapping_range
//<hy>2:往fd上写数据,即往pages上写数据调用流程如下
//filemap_write_and_wait -+--filemap_fdatawrite --+--__filemap_fdatawrite --+--__filemap_fdatawrite_range \
mapping_cap_writeback_dirty or do_writepages[如过是立即写回策略就直接调用如果非立即写回这里将就返回不调用 \
do_writepages] \
//一般写回策略为address_space.a_ops->writepages
//如未定义则使用默认策略mm/Page-writeback.c:generic_writepages最终将用 \
//address_space.backing_dev_info.congested_fn
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
//往用户空间返
retval = nfs_direct_read(iocb, iov, nr_segs, pos);
if (retval > 0)
iocb->ki_pos = pos + retval;
out:
return retval;
}
</hy>
函数:nfs_sync_mapping
/**
* nfs_sync_mapping - helper to flush all mmapped dirty data to disk
*/
int nfs_sync_mapping(struct address_space *mapping)
{
int ret;
if (mapping->nrpages == 0)
return 0;
unmap_mapping_range(mapping, 0, 0, 0);
ret = filemap_write_and_wait(mapping);
if (ret != 0)
goto out;
ret = nfs_wb_all(mapping->host);
out:
return ret;
}
函数:nfs_direct_read
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t result = 0;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
dreq = nfs_direct_req_alloc();
if (!dreq)
return -ENOMEM;
dreq->inode = inode;
//对file->private_data进行引用计数
/* needed for tty driver, and maybe others */
ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
if (!is_sync_kiocb(iocb))//检查是否已经同步了
dreq->iocb = iocb;
//核心语句
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
nfs_direct_req_release(dreq);
return result;
}
函数:nfs_direct_read_schedule_iovec
static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
{
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;
get_dreq(dreq);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
//通信分发的核心函数
//现将数据拷贝下来,然后放入rpc的调度任务中
//task_setup_data.callback_data = data;
//运行rpc任务
result = nfs_direct_read_schedule_segment(dreq, vec, pos);
if (result < 0)
break;
requested_bytes += result;
if ((size_t)result < vec->iov_len)
break;
pos += vec->iov_len;
}
if (put_dreq(dreq))
nfs_direct_complete(dreq);
if (requested_bytes != 0)
return 0;
if (result < 0)
return result;
return -EIO;
}
函数:nfs_direct_read_schedule_segment
/*
* For each rsize'd chunk of the user's buffer, dispatch an NFS READ
* operation. If nfs_readdata_alloc() or get_user_pages() fails,
* bail and stop sending more reads. Read length accounting is
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
const struct iovec *iov,
loff_t pos)
{
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->path.dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_cred = ctx->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = NFS_CLIENT(inode),
.rpc_message = &msg,
.callback_ops = &nfs_read_direct_ops,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
unsigned int pgbase;
int result;
ssize_t started = 0;
do {
struct nfs_read_data *data;
size_t bytes;
pgbase = user_addr & ~PAGE_MASK;
bytes = min(rsize,count);
result = -ENOMEM;
data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
if (unlikely(!data))
break;
down_read(¤t->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
data->npages, 1, 0, data->pagevec, NULL);
up_read(¤t->mm->mmap_sem);
if (result < 0) {
nfs_readdata_free(data);
break;
}
if ((unsigned)result < data->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
nfs_direct_release_pages(data->pagevec, result);
nfs_readdata_free(data);
break;
}
bytes -= pgbase;
data->npages = result;
}
get_dreq(dreq);
data->req = (struct nfs_page *) dreq;
data->inode = inode;
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
data->args.count = bytes;
data->res.fattr = &data->fattr;
data->res.eof = 0;
data->res.count = bytes;
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
task_setup_data.task = &data->task;
task_setup_data.callback_data = data;
NFS_PROTO(inode)->read_setup(data, &msg);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
break;
rpc_put_task(task);
dprintk("NFS: %5u initiated direct read call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
started += bytes;
user_addr += bytes;
pos += bytes;
/* FIXME: Remove this unnecessary math from final patch */
pgbase += bytes;
pgbase &= ~PAGE_MASK;
BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
count -= bytes;
} while (count != 0);
if (started)
return started;
return result < 0 ? (ssize_t) result : -EFAULT;