diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 211dc2aed8e1..eb78109d666c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1061,7 +1061,7 @@ static const struct rpc_call_ops nfsd4_cb_ops = { int nfsd4_create_callback_queue(void) { - callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); + callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); if (!callback_wq) return -ENOMEM; return 0; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 42aace4fc4c8..596205d939a1 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -686,10 +686,6 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) return 0; } /* Fallthrough */ - case -NFS4ERR_NOMATCHING_LAYOUT: - trace_layout_recall_done(&ls->ls_stid.sc_stateid); - task->tk_status = 0; - return 1; default: /* * Unknown error or non-responding client, we'll need to fence. @@ -702,6 +698,10 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) else nfsd4_cb_layout_fail(ls); return -1; + case -NFS4ERR_NOMATCHING_LAYOUT: + trace_layout_recall_done(&ls->ls_stid.sc_stateid); + task->tk_status = 0; + return 1; } } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index abb09b580389..74a6e573e061 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -96,33 +96,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct dentry *dentry = cstate->current_fh.fh_dentry; - /* - * Check about attributes are supported by the NFSv4 server or not. - * According to spec, unsupported attributes return ERR_ATTRNOTSUPP. - */ - if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) || - (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) || - (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) + if (!nfsd_attrs_supported(cstate->minorversion, bmval)) return nfserr_attrnotsupp; - - /* - * Check FATTR4_WORD0_ACL can be supported - * in current environment or not. - */ - if (bmval[0] & FATTR4_WORD0_ACL) { - if (!IS_POSIXACL(d_inode(dentry))) - return nfserr_attrnotsupp; - } - - /* - * According to spec, read-only attributes return ERR_INVAL. - */ - if (writable) { - if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || - (bmval[2] & ~writable[2])) - return nfserr_inval; - } - + if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry))) + return nfserr_attrnotsupp; + if (writable && !bmval_is_subset(bmval, writable)) + return nfserr_inval; + if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) && + (bmval[1] & FATTR4_WORD1_MODE)) + return nfserr_inval; return nfs_ok; } @@ -695,9 +677,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) return nfserr_inval; - getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); - getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); - getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); + getattr->ga_bmval[0] &= nfsd_suppattrs[cstate->minorversion][0]; + getattr->ga_bmval[1] &= nfsd_suppattrs[cstate->minorversion][1]; + getattr->ga_bmval[2] &= nfsd_suppattrs[cstate->minorversion][2]; getattr->ga_fhp = &cstate->current_fh; return nfs_ok; @@ -799,9 +781,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) return nfserr_inval; - readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); - readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); - readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); + readdir->rd_bmval[0] &= nfsd_suppattrs[cstate->minorversion][0]; + readdir->rd_bmval[1] &= nfsd_suppattrs[cstate->minorversion][1]; + readdir->rd_bmval[2] &= nfsd_suppattrs[cstate->minorversion][2]; if ((cookie == 1) || (cookie == 2) || (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c2d2895a1ec1..79edde4577b2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -33,6 +33,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -57,6 +58,20 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR +u32 nfsd_suppattrs[3][3] = { + {NFSD4_SUPPORTED_ATTRS_WORD0, + NFSD4_SUPPORTED_ATTRS_WORD1, + NFSD4_SUPPORTED_ATTRS_WORD2}, + + {NFSD4_1_SUPPORTED_ATTRS_WORD0, + NFSD4_1_SUPPORTED_ATTRS_WORD1, + NFSD4_1_SUPPORTED_ATTRS_WORD2}, + + {NFSD4_1_SUPPORTED_ATTRS_WORD0, + NFSD4_1_SUPPORTED_ATTRS_WORD1, + NFSD4_2_SUPPORTED_ATTRS_WORD2}, +}; + /* * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing * directory in order to indicate to the client that a filesystem boundary is present @@ -285,7 +300,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, - struct xdr_netobj *label) + struct xdr_netobj *label, int *umask) { int expected_len, len = 0; u32 dummy32; @@ -296,6 +311,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if ((status = nfsd4_decode_bitmap(argp, bmval))) return status; + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 + || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 + || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) { + if (nfsd_attrs_supported(argp->minorversion, bmval)) + return nfserr_inval; + return nfserr_attrnotsupp; + } + READ_BUF(4); expected_len = be32_to_cpup(p++); @@ -435,12 +458,18 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_jukebox; } #endif - - if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 - || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 - || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) - READ_BUF(expected_len - len); - else if (len != expected_len) + if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { + if (!umask) + goto xdr_error; + READ_BUF(8); + len += 8; + dummy32 = be32_to_cpup(p++); + iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO); + dummy32 = be32_to_cpup(p++); + *umask = dummy32 & S_IRWXUGO; + iattr->ia_valid |= ATTR_MODE; + } + if (len != expected_len) goto xdr_error; DECODE_TAIL; @@ -634,7 +663,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl, &create->cr_label); + &create->cr_acl, &create->cr_label, + ¤t->fs->umask); if (status) goto out; @@ -879,13 +909,15 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: + current->fs->umask = 0; READ_BUF(4); open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl, &open->op_label); + &open->op_iattr, &open->op_acl, &open->op_label, + ¤t->fs->umask); if (status) goto out; break; @@ -899,7 +931,8 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl, &open->op_label); + &open->op_iattr, &open->op_acl, &open->op_label, + ¤t->fs->umask); if (status) goto out; break; @@ -1136,7 +1169,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta if (status) return status; return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl, &setattr->sa_label); + &setattr->sa_acl, &setattr->sa_label, NULL); } static __be32 @@ -2340,9 +2373,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); - BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); - BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion)); - BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); + BUG_ON(!nfsd_attrs_supported(minorversion, bmval)); if (exp->ex_fslocs.migrated) { status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err); @@ -2409,29 +2440,27 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - u32 word0 = nfsd_suppattrs0(minorversion); - u32 word1 = nfsd_suppattrs1(minorversion); - u32 word2 = nfsd_suppattrs2(minorversion); + u32 *supp = nfsd_suppattrs[minorversion]; if (!IS_POSIXACL(dentry->d_inode)) - word0 &= ~FATTR4_WORD0_ACL; + supp[0] &= ~FATTR4_WORD0_ACL; if (!contextsupport) - word2 &= ~FATTR4_WORD2_SECURITY_LABEL; - if (!word2) { + supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (!supp[2]) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(word0); - *p++ = cpu_to_be32(word1); + *p++ = cpu_to_be32(supp[0]); + *p++ = cpu_to_be32(supp[1]); } else { p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(word0); - *p++ = cpu_to_be32(word1); - *p++ = cpu_to_be32(word2); + *p++ = cpu_to_be32(supp[0]); + *p++ = cpu_to_be32(supp[1]); + *p++ = cpu_to_be32(supp[2]); } } if (bmval0 & FATTR4_WORD0_TYPE) { diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 54cde9a5864e..d6b97b424ad1 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -174,8 +175,12 @@ int nfsd_reply_cache_init(void) goto out_nomem; drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); - if (!drc_hashtbl) - goto out_nomem; + if (!drc_hashtbl) { + drc_hashtbl = vzalloc(hashsize * sizeof(*drc_hashtbl)); + if (!drc_hashtbl) + goto out_nomem; + } + for (i = 0; i < hashsize; i++) { INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); spin_lock_init(&drc_hashtbl[i].cache_lock); @@ -204,7 +209,7 @@ void nfsd_reply_cache_shutdown(void) } } - kfree (drc_hashtbl); + kvfree(drc_hashtbl); drc_hashtbl = NULL; drc_hashsize = 0; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2857e46d5cc5..f3b2f34b10a3 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -217,7 +217,7 @@ static const struct file_operations pool_stats_operations = { .release = nfsd_pool_stats_release, }; -static struct file_operations reply_cache_stats_operations = { +static const struct file_operations reply_cache_stats_operations = { .open = nfsd_reply_cache_stats_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9446849888d5..d74c8c44dc35 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -359,44 +359,46 @@ void nfsd_lockd_shutdown(void); #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS) -static inline u32 nfsd_suppattrs0(u32 minorversion) +extern u32 nfsd_suppattrs[3][3]; + +static inline bool bmval_is_subset(u32 *bm1, u32 *bm2) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 - : NFSD4_SUPPORTED_ATTRS_WORD0; + return !((bm1[0] & ~bm2[0]) || + (bm1[1] & ~bm2[1]) || + (bm1[2] & ~bm2[2])); } -static inline u32 nfsd_suppattrs1(u32 minorversion) +static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1 - : NFSD4_SUPPORTED_ATTRS_WORD1; -} - -static inline u32 nfsd_suppattrs2(u32 minorversion) -{ - switch (minorversion) { - default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; - case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; - case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; - } + return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]); } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ #define NFSD_WRITEONLY_ATTRS_WORD1 \ (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) -/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ +/* + * These are the only attrs allowed in CREATE/OPEN/SETATTR. Don't add + * a writeable attribute here without also adding code to parse it to + * nfsd4_decode_fattr(). + */ #define NFSD_WRITEABLE_ATTRS_WORD0 \ (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \ + FATTR4_WORD2_SECURITY_LABEL #else -#define NFSD_WRITEABLE_ATTRS_WORD2 0 +#define MAYBE_FATTR4_WORD2_SECURITY_LABEL 0 #endif +#define NFSD_WRITEABLE_ATTRS_WORD2 \ + (FATTR4_WORD2_MODE_UMASK \ + | MAYBE_FATTR4_WORD2_SECURITY_LABEL) #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index a2b65fc56dd6..e6bfd96734c0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -661,8 +661,8 @@ nfsd(void *vrqstp) mutex_lock(&nfsd_mutex); /* At this point, the thread shares current->fs - * with the init process. We need to create files with a - * umask of 0 instead of init's umask. */ + * with the init process. We need to create files with the + * umask as defined by the client instead of init's umask. */ if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cc3ae16eac68..757fb963696c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -79,7 +79,6 @@ struct svc_rdma_op_ctxt { struct ib_cqe reg_cqe; struct ib_cqe inv_cqe; struct list_head dto_q; - enum ib_wc_status wc_status; u32 byte_len; u32 position; struct svcxprt_rdma *xprt; @@ -139,7 +138,7 @@ struct svcxprt_rdma { int sc_max_sge_rd; /* max sge for read target */ bool sc_snd_w_inv; /* OK to use Send With Invalidate */ - atomic_t sc_sq_count; /* Number of SQ WR on queue */ + atomic_t sc_sq_avail; /* SQEs ready to be consumed */ unsigned int sc_sq_depth; /* Depth of SQ */ unsigned int sc_rq_depth; /* Depth of RQ */ u32 sc_max_requests; /* Forward credits */ @@ -148,7 +147,6 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; - atomic_t sc_dma_used; spinlock_t sc_ctxt_lock; struct list_head sc_ctxts; int sc_ctxt_used; @@ -200,7 +198,6 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, struct svc_rdma_op_ctxt *ctxt) { ctxt->mapped_sges++; - atomic_inc(&rdma->sc_dma_used); } /* svc_rdma_backchannel.c */ @@ -236,8 +233,6 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, struct svc_rdma_req_map *, bool); extern int svc_rdma_sendto(struct svc_rqst *); -extern struct rpcrdma_read_chunk * - svc_rdma_get_read_chunk(struct rpcrdma_msg *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, int); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 90115ceefd49..fb39284ec174 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -200,7 +200,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(hmac_md5)) goto out_free_md5; - req = ahash_request_alloc(md5, GFP_KERNEL); + req = ahash_request_alloc(md5, GFP_NOFS); if (!req) goto out_free_hmac_md5; @@ -230,7 +230,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, goto out; ahash_request_free(req); - req = ahash_request_alloc(hmac_md5, GFP_KERNEL); + req = ahash_request_alloc(hmac_md5, GFP_NOFS); if (!req) goto out_free_hmac_md5; @@ -299,7 +299,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(tfm)) goto out_free_cksum; - req = ahash_request_alloc(tfm, GFP_KERNEL); + req = ahash_request_alloc(tfm, GFP_NOFS); if (!req) goto out_free_ahash; @@ -397,7 +397,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, goto out_free_cksum; checksumlen = crypto_ahash_digestsize(tfm); - req = ahash_request_alloc(tfm, GFP_KERNEL); + req = ahash_request_alloc(tfm, GFP_NOFS); if (!req) goto out_free_ahash; @@ -963,7 +963,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, } desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), - GFP_KERNEL); + GFP_NOFS); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); @@ -1030,7 +1030,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, } desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), - GFP_KERNEL); + GFP_NOFS); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60595835317a..7bb2514aadd9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -451,8 +451,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx) goto out_err_free_hmac; - desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), - GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), GFP_NOFS); if (!desc) { dprintk("%s: failed to allocate hash descriptor for '%s'\n", __func__, ctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 45662d7f0943..886e9d381771 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1548,7 +1548,7 @@ complete: ret = SVC_COMPLETE; goto out; drop: - ret = SVC_DROP; + ret = SVC_CLOSE; out: if (rsci) cache_put(&rsci->h, sn->rsc_cache); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7c8070ec93c8..75f290bddca1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1155,8 +1155,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) case SVC_DENIED: goto err_bad_auth; case SVC_CLOSE: - if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) - svc_close_xprt(rqstp->rq_xprt); + goto close; case SVC_DROP: goto dropit; case SVC_COMPLETE: @@ -1246,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) sendit: if (svc_authorise(rqstp)) - goto dropit; + goto close; return 1; /* Caller can now send it */ dropit: @@ -1254,11 +1253,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) dprintk("svc: svc_process dropit\n"); return 0; + close: + if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + svc_close_xprt(rqstp->rq_xprt); + dprintk("svc: svc_process close\n"); + return 0; + err_short_len: svc_printk(rqstp, "short len %Zd, dropping request\n", argv->iov_len); - - goto dropit; /* drop request */ + goto close; err_bad_rpc: serv->sv_stats->rpcbadfmt++; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 69841db1f533..e112da8005b5 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -124,8 +124,7 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister); #define DN_HASHMAX (1<tk_rqstp; - struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; size_t size = rqst->rq_callsize; - struct svcxprt_rdma *rdma; struct page *page; - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (size > PAGE_SIZE) { WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", size); @@ -359,6 +355,7 @@ xprt_setup_rdma_bc(struct xprt_create *args) out_fail: xprt_rdma_free_addresses(xprt); args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); xprt_free(xprt); return ERR_PTR(-EINVAL); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ad1df979b3f0..57d35fbb1c28 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -279,7 +279,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, frmr->sg); return -ENOMEM; } - atomic_inc(&xprt->sc_dma_used); n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { @@ -374,9 +373,7 @@ rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, u32 position, u32 byte_count, u32 page_offset, int page_no) { char *srcp, *destp; - int ret; - ret = 0; srcp = head->arg.head[0].iov_base + position; byte_count = head->arg.head[0].iov_len - position; if (byte_count > PAGE_SIZE) { @@ -415,6 +412,20 @@ done: return 1; } +/* Returns the address of the first read chunk or if no read chunk + * is present + */ +static struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == xdr_zero) + return NULL; + return ch; +} + static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, @@ -627,8 +638,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto defer; goto out; } - dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", - ctxt, rdma_xprt, rqstp, ctxt->wc_status); + dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n", + ctxt, rdma_xprt, rqstp); atomic_inc(&rdma_stat_recv); /* Build up the XDR from the receive buffers. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f5a91edcd233..ad4d286a83c5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -153,76 +153,35 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, return dma_addr; } -/* Returns the address of the first read chunk or if no read chunk - * is present +/* Parse the RPC Call's transport header. */ -struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp, + struct rpcrdma_write_array **write, + struct rpcrdma_write_array **reply) { - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + __be32 *p; - if (ch->rc_discrim == xdr_zero) - return NULL; - return ch; -} + p = (__be32 *)&rmsgp->rm_body.rm_chunks[0]; -/* Returns the address of the first read write array element or - * if no write array list is present - */ -static struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || - rmsgp->rm_body.rm_chunks[1] == xdr_zero) - return NULL; - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} + /* Read list */ + while (*p++ != xdr_zero) + p += 5; -/* Returns the address of the first reply array element or if no - * reply array is present - */ -static struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp, - struct rpcrdma_write_array *wr_ary) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply chunk may occur with read list - * and/or write list. - */ - if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || - rmsgp->rm_body.rm_chunks[1] != xdr_zero) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim != xdr_zero) - rch++; - - /* The reply chunk follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - goto found_it; + /* Write list */ + if (*p != xdr_zero) { + *write = (struct rpcrdma_write_array *)p; + while (*p++ != xdr_zero) + p += 1 + be32_to_cpu(*p) * 4; + } else { + *write = NULL; + p++; } - if (wr_ary) { - int chunk = be32_to_cpu(wr_ary->wc_nchunks); - - rp_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_array[chunk].wc_target.rs_length; - goto found_it; - } - - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == xdr_zero) - return NULL; - return rp_ary; + /* Reply chunk */ + if (*p != xdr_zero) + *reply = (struct rpcrdma_write_array *)p; + else + *reply = NULL; } /* RPC-over-RDMA Version One private extension: Remote Invalidation. @@ -240,31 +199,22 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp, { struct rpcrdma_read_chunk *rd_ary; struct rpcrdma_segment *arg_ch; - u32 inv_rkey; - inv_rkey = 0; - - rd_ary = svc_rdma_get_read_chunk(rdma_argp); - if (rd_ary) { - inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle); - goto out; - } + rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0]; + if (rd_ary->rc_discrim != xdr_zero) + return be32_to_cpu(rd_ary->rc_target.rs_handle); if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { arg_ch = &wr_ary->wc_array[0].wc_target; - inv_rkey = be32_to_cpu(arg_ch->rs_handle); - goto out; + return be32_to_cpu(arg_ch->rs_handle); } if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { arg_ch = &rp_ary->wc_array[0].wc_target; - inv_rkey = be32_to_cpu(arg_ch->rs_handle); - goto out; + return be32_to_cpu(arg_ch->rs_handle); } -out: - dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey); - return inv_rkey; + return 0; } /* Assumptions: @@ -622,8 +572,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) * places this at the start of page 0. */ rdma_argp = page_address(rqstp->rq_pages[0]); - wr_ary = svc_rdma_get_write_array(rdma_argp); - rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); + svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary); inv_rkey = 0; if (rdma->sc_snd_w_inv) @@ -636,7 +585,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err0; inline_bytes = rqstp->rq_res.len; - /* Create the RDMA response header */ + /* Create the RDMA response header. xprt->xpt_mutex, + * acquired in svc_send(), serializes RPC replies. The + * code path below that inserts the credit grant value + * into each transport header runs only inside this + * critical section. + */ ret = -ENOMEM; res_page = alloc_page(GFP_KERNEL); if (!res_page) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 1334de2715c2..ca2799af05a6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -41,6 +41,7 @@ */ #include +#include #include #include #include @@ -226,25 +227,22 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) struct svcxprt_rdma *xprt = ctxt->xprt; struct ib_device *device = xprt->sc_cm_id->device; u32 lkey = xprt->sc_pd->local_dma_lkey; - unsigned int i, count; + unsigned int i; - for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { + for (i = 0; i < ctxt->mapped_sges; i++) { /* * Unmap the DMA addr in the SGE if the lkey matches * the local_dma_lkey, otherwise, ignore it since it is * an FRMR lkey and will be unmapped later when the * last WR that uses it completes. */ - if (ctxt->sge[i].lkey == lkey) { - count++; + if (ctxt->sge[i].lkey == lkey) ib_dma_unmap_page(device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); - } } ctxt->mapped_sges = 0; - atomic_sub(count, &xprt->sc_dma_used); } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -398,7 +396,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); - ctxt->wc_status = wc->status; svc_rdma_unmap_dma(ctxt); if (wc->status != IB_WC_SUCCESS) @@ -436,7 +433,7 @@ static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt, goto err; out: - atomic_dec(&xprt->sc_sq_count); + atomic_inc(&xprt->sc_sq_avail); wake_up(&xprt->sc_send_wait); return; @@ -946,7 +943,6 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, if (frmr) { ib_dma_unmap_sg(rdma->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); - atomic_dec(&rdma->sc_dma_used); spin_lock_bh(&rdma->sc_frmr_q_lock); WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); @@ -973,6 +969,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; struct ib_device *dev; + struct sockaddr *sap; unsigned int i; int ret = 0; @@ -1010,6 +1007,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth; + atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); if (!svc_rdma_prealloc_ctxts(newxprt)) goto errout; @@ -1052,18 +1050,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = newxprt->sc_sq_cq; qp_attr.recv_cq = newxprt->sc_rq_cq; - dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n" - " cm_id->device=%p, sc_pd->device=%p\n" - " cap.max_send_wr = %d\n" - " cap.max_recv_wr = %d\n" - " cap.max_send_sge = %d\n" - " cap.max_recv_sge = %d\n", - newxprt->sc_cm_id, newxprt->sc_pd, - dev, newxprt->sc_pd->device, - qp_attr.cap.max_send_wr, - qp_attr.cap.max_recv_wr, - qp_attr.cap.max_send_sge, - qp_attr.cap.max_recv_sge); + dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n", + newxprt->sc_cm_id, newxprt->sc_pd); + dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n", + qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr); + dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n", + qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge); ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { @@ -1146,31 +1138,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) goto errout; } - dprintk("svcrdma: new connection %p accepted with the following " - "attributes:\n" - " local_ip : %pI4\n" - " local_port : %d\n" - " remote_ip : %pI4\n" - " remote_port : %d\n" - " max_sge : %d\n" - " max_sge_rd : %d\n" - " sq_depth : %d\n" - " max_requests : %d\n" - " ord : %d\n", - newxprt, - &((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.src_addr)->sin_addr.s_addr, - ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.src_addr)->sin_port), - &((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.dst_addr)->sin_addr.s_addr, - ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.dst_addr)->sin_port), - newxprt->sc_max_sge, - newxprt->sc_max_sge_rd, - newxprt->sc_sq_depth, - newxprt->sc_max_requests, - newxprt->sc_ord); + dprintk("svcrdma: new connection %p accepted:\n", newxprt); + sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; + dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); + sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; + dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); + dprintk(" max_sge : %d\n", newxprt->sc_max_sge); + dprintk(" max_sge_rd : %d\n", newxprt->sc_max_sge_rd); + dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); + dprintk(" max_requests : %d\n", newxprt->sc_max_requests); + dprintk(" ord : %d\n", newxprt->sc_ord); return &newxprt->sc_xprt; @@ -1257,9 +1234,6 @@ static void __svc_rdma_free(struct work_struct *work) if (rdma->sc_ctxt_used != 0) pr_err("svcrdma: ctxt still in use? (%d)\n", rdma->sc_ctxt_used); - if (atomic_read(&rdma->sc_dma_used) != 0) - pr_err("svcrdma: dma still in use? (%d)\n", - atomic_read(&rdma->sc_dma_used)); /* Final put of backchannel client transport */ if (xprt->xpt_bc_xprt) { @@ -1339,15 +1313,13 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) /* If the SQ is full, wait until an SQ entry is available */ while (1) { - spin_lock_bh(&xprt->sc_lock); - if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { - spin_unlock_bh(&xprt->sc_lock); + if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) { atomic_inc(&rdma_stat_sq_starve); /* Wait until SQ WR available if SQ still full */ + atomic_add(wr_count, &xprt->sc_sq_avail); wait_event(xprt->sc_send_wait, - atomic_read(&xprt->sc_sq_count) < - xprt->sc_sq_depth); + atomic_read(&xprt->sc_sq_avail) > wr_count); if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; continue; @@ -1357,21 +1329,17 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) svc_xprt_get(&xprt->sc_xprt); /* Bump used SQ WR count and post */ - atomic_add(wr_count, &xprt->sc_sq_count); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); if (ret) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - atomic_sub(wr_count, &xprt->sc_sq_count); for (i = 0; i < wr_count; i ++) svc_xprt_put(&xprt->sc_xprt); - dprintk("svcrdma: failed to post SQ WR rc=%d, " - "sc_sq_count=%d, sc_sq_depth=%d\n", - ret, atomic_read(&xprt->sc_sq_count), - xprt->sc_sq_depth); - } - spin_unlock_bh(&xprt->sc_lock); - if (ret) + dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret); + dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n", + atomic_read(&xprt->sc_sq_avail), + xprt->sc_sq_depth); wake_up(&xprt->sc_send_wait); + } break; } return ret;