diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 0c4fab018791..a5ebc7dbb384 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, struct nfsd3_readres *resp) { int nfserr; + u32 max_blocksize = svc_max_payload(rqstp); dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", SVCFH_fmt(&argp->fh), @@ -172,8 +173,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, */ resp->count = argp->count; - if (NFSSVC_MAXBLKSIZE < resp->count) - resp->count = NFSSVC_MAXBLKSIZE; + if (max_blocksize < resp->count) + resp->count = max_blocksize; svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); @@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct nfsd3_fsinfores *resp) { int nfserr; + u32 max_blocksize = svc_max_payload(rqstp); dprintk("nfsd: FSINFO(3) %s\n", SVCFH_fmt(&argp->fh)); - resp->f_rtmax = NFSSVC_MAXBLKSIZE; - resp->f_rtpref = NFSSVC_MAXBLKSIZE; + resp->f_rtmax = max_blocksize; + resp->f_rtpref = max_blocksize; resp->f_rtmult = PAGE_SIZE; - resp->f_wtmax = NFSSVC_MAXBLKSIZE; - resp->f_wtpref = NFSSVC_MAXBLKSIZE; + resp->f_wtmax = max_blocksize; + resp->f_wtpref = max_blocksize; resp->f_wtmult = PAGE_SIZE; resp->f_dtpref = PAGE_SIZE; resp->f_maxfilesize = ~(u32) 0; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 4b9aefbcc93c..247d518248bf 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, { unsigned int len; int v,pn; + u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh)) || !(p = xdr_decode_hyper(p, &args->offset))) @@ -337,8 +338,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, len = args->count = ntohl(*p++); - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > max_blocksize) + len = max_blocksize; /* set up the kvec */ v=0; @@ -358,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_writeargs *args) { unsigned int len, v, hdr; + u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh)) || !(p = xdr_decode_hyper(p, &args->offset))) @@ -375,8 +377,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, rqstp->rq_vec[0].iov_base = (void*)p; rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > max_blocksize) + len = max_blocksize; v= 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -564,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_readdirargs *args) { int len, pn; + u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh))) return 0; @@ -572,7 +575,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE : + len = (args->count > max_blocksize) ? max_blocksize : args->count; args->count = len; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index df341956254e..4cfacc557b40 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1536,12 +1536,12 @@ out_acl: if (bmval0 & FATTR4_WORD0_MAXREAD) { if ((buflen -= 8) < 0) goto out_resource; - WRITE64((u64) NFSSVC_MAXBLKSIZE); + WRITE64((u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { if ((buflen -= 8) < 0) goto out_resource; - WRITE64((u64) NFSSVC_MAXBLKSIZE); + WRITE64((u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { if ((buflen -= 4) < 0) @@ -2055,7 +2055,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, RESERVE_SPACE(8); /* eof flag and byte count */ - maxcount = NFSSVC_MAXBLKSIZE; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index ca4973150218..9ee1dab5d44a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -146,13 +146,13 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, * status, 17 words for fattr, and 1 word for the byte count. */ - if (NFSSVC_MAXBLKSIZE < argp->count) { + if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { printk(KERN_NOTICE "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), ntohs(rqstp->rq_addr.sin_port), argp->count); - argp->count = NFSSVC_MAXBLKSIZE; + argp->count = NFSSVC_MAXBLKSIZE_V2; } svc_reserve(rqstp, (19<<2) + argp->count + 4); @@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = { PROC(none, void, void, none, RC_NOCACHE, ST), PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), - PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4), + PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), PROC(none, void, void, none, RC_NOCACHE, ST), PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index ab6745e78d16..1135c0d14557 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -254,8 +254,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > NFSSVC_MAXBLKSIZE_V2) + len = NFSSVC_MAXBLKSIZE_V2; /* set up somewhere to store response. * We take pages, put them on reslist and include in iovec @@ -288,8 +288,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, rqstp->rq_vec[0].iov_base = (void*)p; rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - (((void*)p) - rqstp->rq_arg.head[0].iov_base); - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > NFSSVC_MAXBLKSIZE_V2) + len = NFSSVC_MAXBLKSIZE_V2; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -458,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p, { struct kstatfs *stat = &resp->stats; - *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */ + *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ *p++ = htonl(stat->f_bsize); *p++ = htonl(stat->f_blocks); *p++ = htonl(stat->f_bfree); diff --git a/include/linux/nfsd/const.h b/include/linux/nfsd/const.h index b75bb1b38d09..adbddf007898 100644 --- a/include/linux/nfsd/const.h +++ b/include/linux/nfsd/const.h @@ -13,6 +13,7 @@ #include #include #include +#include /* * Maximum protocol version supported by knfsd @@ -23,6 +24,8 @@ * Maximum blocksize supported by daemon currently at 32K */ #define NFSSVC_MAXBLKSIZE (32*1024) +/* NFSv2 is limited by the protocol specification, see RFC 1094 */ +#define NFSSVC_MAXBLKSIZE_V2 (8*1024) #ifdef __KERNEL__ @@ -30,7 +33,17 @@ # define NFS_SUPER_MAGIC 0x6969 #endif -#define NFSD_BUFSIZE (1024 + NFSSVC_MAXBLKSIZE) +/* + * Largest number of bytes we need to allocate for an NFS + * call or reply. Used to control buffer sizes. We use + * the length of v3 WRITE, READDIR and READDIR replies + * which are an RPC header, up to 26 XDR units of reply + * data, and some page data. + * + * Note that accuracy here doesn't matter too much as the + * size is rounded up to a page size when allocating space. + */ +#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) #ifdef CONFIG_NFSD_V4 # define NFSSVC_XDRSIZE NFS4_SVC_XDRSIZE diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 862c0d8c8381..534cdc7be58d 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -20,9 +20,6 @@ /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 -/* Maximum size (in bytes) of an rpc credential or verifier */ -#define RPC_MAX_AUTH_SIZE (400) - /* Work around the lack of a VFS credential */ struct auth_cred { uid_t uid; diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 8d10d148834e..1e65f2dd80e5 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -11,6 +11,9 @@ #define RPC_VERSION 2 +/* size of an XDR encoding unit in bytes, i.e. 32bit */ +#define XDR_UNIT (4) + /* spec defines authentication flavor as an unsigned 32 bit integer */ typedef u32 rpc_authflavor_t; @@ -34,6 +37,9 @@ enum rpc_auth_flavors { RPC_AUTH_GSS_SPKMP = 390011, }; +/* Maximum size (in bytes) of an rpc credential or verifier */ +#define RPC_MAX_AUTH_SIZE (400) + enum rpc_msg_type { RPC_CALL = 0, RPC_REPLY = 1 @@ -101,5 +107,39 @@ typedef __be32 rpc_fraghdr; #define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) #define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1) +/* + * RPC call and reply header size as number of 32bit words (verifier + * size computed separately, see below) + */ +#define RPC_CALLHDRSIZE (6) +#define RPC_REPHDRSIZE (4) + + +/* + * Maximum RPC header size, including authentication, + * as number of 32bit words (see RFCs 1831, 1832). + * + * xid 1 xdr unit = 4 bytes + * mtype 1 + * rpc_version 1 + * program 1 + * prog_version 1 + * procedure 1 + * cred { + * flavor 1 + * length 1 + * body 100 xdr units = 400 bytes + * } + * verf { + * flavor 1 + * length 1 + * body 100 xdr units = 400 bytes + * } + * TOTAL 210 xdr units = 840 bytes + */ +#define RPC_MAX_HEADER_WITH_AUTH \ + (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) + + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index cb0ed9beb227..74e52c245da4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -95,8 +96,28 @@ static inline void svc_get(struct svc_serv *serv) * Maximum payload size supported by a kernel RPC server. * This is use to determine the max number of pages nfsd is * willing to return in a single READ operation. + * + * These happen to all be powers of 2, which is not strictly + * necessary but helps enforce the real limitation, which is + * that they should be multiples of PAGE_CACHE_SIZE. + * + * For UDP transports, a block plus NFS,RPC, and UDP headers + * has to fit into the IP datagram limit of 64K. The largest + * feasible number for all known page sizes is probably 48K, + * but we choose 32K here. This is the same as the historical + * Linux limit; someone who cares more about NFS/UDP performance + * can test a larger number. + * + * For TCP transports we have more freedom. A size of 1MB is + * chosen to match the client limit. Other OSes are known to + * have larger limits, but those numbers are probably beyond + * the point of diminishing returns. */ -#define RPCSVC_MAXPAYLOAD (64*1024u) +#define RPCSVC_MAXPAYLOAD (1*1024*1024u) +#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD +#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) + +extern u32 svc_max_payload(const struct svc_rqst *rqstp); /* * RPC Requsts and replies are stored in one or more pages. diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6cf626580752..60394fbc4c70 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -15,6 +15,7 @@ #include #include #include +#include extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; @@ -23,13 +24,6 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) -/* - * RPC call and reply header size as number of 32bit words (verifier - * size computed separately) - */ -#define RPC_CALLHDRSIZE 6 -#define RPC_REPHDRSIZE 4 - /* * Parameters for choosing a free port */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index f4a509a925b5..b252401c8601 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -919,3 +919,18 @@ err_bad: svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } + +/* + * Return (transport-specific) limit on the rpc payload. + */ +u32 svc_max_payload(const struct svc_rqst *rqstp) +{ + int max = RPCSVC_MAXPAYLOAD_TCP; + + if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) + max = RPCSVC_MAXPAYLOAD_UDP; + if (rqstp->rq_server->sv_bufsz < max) + max = rqstp->rq_server->sv_bufsz; + return max; +} +EXPORT_SYMBOL_GPL(svc_max_payload);