From 6d81ed1ec22dbe96b85a5eb6422b2ab0556f7cbc Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Mon, 16 Jun 2014 15:35:24 +0100 Subject: [PATCH 01/35] cifs: replace code with free_rsp_buf() The functionality provided by free_rsp_buf() is duplicated in a number of places. Replace these instances with a call to free_rsp_buf(). Signed-off-by: Sachin Prabhu Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 1 + fs/cifs/cifssmb.c | 20 ++++---------------- fs/cifs/misc.c | 9 +++++++++ fs/cifs/smb2pdu.c | 10 ---------- 4 files changed, 14 insertions(+), 26 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ca7980a1e303..de49d7a37b00 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,6 +36,7 @@ extern struct smb_hdr *cifs_buf_get(void); extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); +extern void free_rsp_buf(int, void *); extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, struct kvec *iov); extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6ce4e0954b98..b7e5b6508caa 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1734,10 +1734,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ if (*buf) { - if (resp_buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(resp_buf_type, iov[0].iov_base); } else if (resp_buf_type != CIFS_NO_BUFFER) { /* return buffer to caller to free */ *buf = iov[0].iov_base; @@ -2203,10 +2200,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, } /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ - if (resp_buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(resp_buf_type, iov[0].iov_base); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -2451,10 +2445,7 @@ plk_err_exit: if (pSMB) cifs_small_buf_release(pSMB); - if (resp_buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(resp_buf_type, iov[0].iov_base); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -3838,10 +3829,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, } } qsec_out: - if (buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(buf_type, iov[0].iov_base); /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ return rc; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 3b0c62e622da..64997a04ab59 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -226,6 +226,15 @@ cifs_small_buf_release(void *buf_to_free) return; } +void +free_rsp_buf(int resp_buftype, void *rsp) +{ + if (resp_buftype == CIFS_SMALL_BUFFER) + cifs_small_buf_release(rsp); + else if (resp_buftype == CIFS_LARGE_BUFFER) + cifs_buf_release(rsp); +} + /* NB: MID can not be set if treeCon not passed in, in that case it is responsbility of caller to set the mid */ void diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b0b260dbb19d..0158104df745 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -309,16 +309,6 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; } -static void -free_rsp_buf(int resp_buftype, void *rsp) -{ - if (resp_buftype == CIFS_SMALL_BUFFER) - cifs_small_buf_release(rsp); - else if (resp_buftype == CIFS_LARGE_BUFFER) - cifs_buf_release(rsp); -} - - /* * * SMB2 Worker functions follow: From 80a0e63751b4d872e8ebe0f14f89fdd3f5c8989b Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Mon, 16 Jun 2014 15:35:25 +0100 Subject: [PATCH 02/35] cifs: Split lanman auth from CIFS_SessSetup() In preparation for splitting CIFS_SessSetup() into smaller more manageable chunks, we first add helper functions. We then proceed to split out lanman auth out of CIFS_SessSetup() Signed-off-by: Sachin Prabhu Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/sess.c | 303 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 258 insertions(+), 45 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index e87387dbf39f..36d0b908f8b4 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -520,6 +520,240 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) } } +struct sess_data { + unsigned int xid; + struct cifs_ses *ses; + struct nls_table *nls_cp; + void (*func)(struct sess_data *); + int result; + + /* we will send the SMB in three pieces: + * a fixed length beginning part, an optional + * SPNEGO blob (which can be zero length), and a + * last part which will include the strings + * and rest of bcc area. This allows us to avoid + * a large buffer 17K allocation + */ + int buf0_type; + struct kvec iov[3]; +}; + +static int +sess_alloc_buffer(struct sess_data *sess_data, int wct) +{ + int rc; + struct cifs_ses *ses = sess_data->ses; + struct smb_hdr *smb_buf; + + rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, + (void **)&smb_buf); + + if (rc) + return rc; + + sess_data->iov[0].iov_base = (char *)smb_buf; + sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4; + /* + * This variable will be used to clear the buffer + * allocated above in case of any error in the calling function. + */ + sess_data->buf0_type = CIFS_SMALL_BUFFER; + + /* 2000 big enough to fit max user, domain, NOS name etc. */ + sess_data->iov[2].iov_base = kmalloc(2000, GFP_KERNEL); + if (!sess_data->iov[2].iov_base) { + rc = -ENOMEM; + goto out_free_smb_buf; + } + + return 0; + +out_free_smb_buf: + kfree(smb_buf); + sess_data->iov[0].iov_base = NULL; + sess_data->iov[0].iov_len = 0; + sess_data->buf0_type = CIFS_NO_BUFFER; + return rc; +} + +static void +sess_free_buffer(struct sess_data *sess_data) +{ + + free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base); + sess_data->buf0_type = CIFS_NO_BUFFER; + kfree(sess_data->iov[2].iov_base); +} + +static int +sess_establish_session(struct sess_data *sess_data) +{ + struct cifs_ses *ses = sess_data->ses; + + mutex_lock(&ses->server->srv_mutex); + if (!ses->server->session_estab) { + if (ses->server->sign) { + ses->server->session_key.response = + kmemdup(ses->auth_key.response, + ses->auth_key.len, GFP_KERNEL); + if (!ses->server->session_key.response) { + mutex_unlock(&ses->server->srv_mutex); + return -ENOMEM; + } + ses->server->session_key.len = + ses->auth_key.len; + } + ses->server->sequence_number = 0x2; + ses->server->session_estab = true; + } + mutex_unlock(&ses->server->srv_mutex); + + cifs_dbg(FYI, "CIFS session established successfully\n"); + spin_lock(&GlobalMid_Lock); + ses->status = CifsGood; + ses->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); + + return 0; +} + +static int +sess_sendreceive(struct sess_data *sess_data) +{ + int rc; + struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base; + __u16 count; + + count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len; + smb_buf->smb_buf_length = + cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count); + put_bcc(count, smb_buf); + + rc = SendReceive2(sess_data->xid, sess_data->ses, + sess_data->iov, 3 /* num_iovecs */, + &sess_data->buf0_type, + CIFS_LOG_ERROR); + + return rc; +} + +/* + * LANMAN and plaintext are less secure and off by default. + * So we make this explicitly be turned on in kconfig (in the + * build) and turned on at runtime (changed from the default) + * in proc/fs/cifs or via mount parm. Unfortunately this is + * needed for old Win (e.g. Win95), some obscure NAS and OS/2 + */ +#ifdef CONFIG_CIFS_WEAK_PW_HASH +static void +sess_auth_lanman(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + char lnm_session_key[CIFS_AUTH_RESP_SIZE]; + __u32 capabilities; + __u16 bytes_remaining; + + /* lanman 2 style sessionsetup */ + /* wct = 10 */ + rc = sess_alloc_buffer(sess_data, 10); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; + + /* no capabilities flags in old lanman negotiation */ + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); + + /* Calculate hash with password and copy into bcc_ptr. + * Encryption Key (stored as in cryptkey) gets used if the + * security mode bit in Negottiate Protocol response states + * to use challenge/response method (i.e. Password bit is 1). + */ + rc = calc_lanman_hash(ses->password, ses->server->cryptkey, + ses->server->sec_mode & SECMODE_PW_ENCRYPT ? + true : false, lnm_session_key); + + memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; + + /* + * can not sign if LANMAN negotiated so no need + * to calculate signing key? but what if server + * changed to do higher than lanman dialect and + * we reconnected would we ever calc signing_key? + */ + + cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); + /* Unicode not allowed for LANMAN dialects */ + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + /* lanman response has a word count of 3 */ + if (smb_buf->WordCount != 3) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + + rc = sess_establish_session(sess_data); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); +} + +#else + +static void +sess_auth_lanman(struct sess_data *sess_data) +{ + sess_data->result = -EOPNOTSUPP; + sess_data->func = NULL; +} +#endif + int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -540,12 +774,21 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ u16 blob_len; char *ntlmsspblob = NULL; + struct sess_data *sess_data; if (ses == NULL) { WARN(1, "%s: ses == NULL!", __func__); return -EINVAL; } + sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL); + if (!sess_data) + return -ENOMEM; + sess_data->xid = xid; + sess_data->ses = ses; + sess_data->buf0_type = CIFS_NO_BUFFER; + sess_data->nls_cp = (struct nls_table *) nls_cp; + type = select_sectype(ses->server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); if (type == Unspecified) { @@ -554,6 +797,14 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, return -EINVAL; } + switch (type) { + case LANMAN: + sess_auth_lanman(sess_data); + goto out; + default: + cifs_dbg(FYI, "Continuing with CIFS_SessSetup\n"); + } + if (type == RawNTLMSSP) { /* if memory allocation is successful, caller of this function * frees it. @@ -569,17 +820,7 @@ ssetup_ntlmssp_authenticate: if (phase == NtLmChallenge) phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ - if (type == LANMAN) { -#ifndef CONFIG_CIFS_WEAK_PW_HASH - /* LANMAN and plaintext are less secure and off by default. - So we make this explicitly be turned on in kconfig (in the - build) and turned on at runtime (changed from the default) - in proc/fs/cifs or via mount parm. Unfortunately this is - needed for old Win (e.g. Win95), some obscure NAS and OS/2 */ - return -EOPNOTSUPP; -#endif - wct = 10; /* lanman 2 style sessionsetup */ - } else if ((type == NTLM) || (type == NTLMv2)) { + if ((type == NTLM) || (type == NTLMv2)) { /* For NTLMv2 failures eventually may need to retry NTLM */ wct = 13; /* old style NTLM sessionsetup */ } else /* same size: negotiate or auth, NTLMSSP or extended security */ @@ -618,39 +859,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_base = NULL; iov[1].iov_len = 0; - if (type == LANMAN) { -#ifdef CONFIG_CIFS_WEAK_PW_HASH - char lnm_session_key[CIFS_AUTH_RESP_SIZE]; - - pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; - - /* no capabilities flags in old lanman negotiation */ - - pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); - - /* Calculate hash with password and copy into bcc_ptr. - * Encryption Key (stored as in cryptkey) gets used if the - * security mode bit in Negottiate Protocol response states - * to use challenge/response method (i.e. Password bit is 1). - */ - - rc = calc_lanman_hash(ses->password, ses->server->cryptkey, - ses->server->sec_mode & SECMODE_PW_ENCRYPT ? - true : false, lnm_session_key); - - memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); - bcc_ptr += CIFS_AUTH_RESP_SIZE; - - /* can not sign if LANMAN negotiated so no need - to calculate signing key? but what if server - changed to do higher than lanman dialect and - we reconnected would we ever calc signing_key? */ - - cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); - /* Unicode not allowed for LANMAN dialects */ - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); -#endif - } else if (type == NTLM) { + if (type == NTLM) { pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); pSMB->req_no_secext.CaseInsensitivePasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); @@ -889,7 +1098,6 @@ ssetup_ntlmssp_authenticate: } if (phase == NtLmChallenge) { rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); - /* now goto beginning for ntlmssp authenticate phase */ if (rc) goto ssetup_exit; } @@ -962,4 +1170,9 @@ keycp_exit: kfree(ses->ntlmssp); return rc; + +out: + rc = sess_data->result; + kfree(sess_data); + return rc; } From 583cf7afc743af9624e85aab472e8bdcd0ba8b3f Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Mon, 16 Jun 2014 15:35:26 +0100 Subject: [PATCH 03/35] cifs: Split ntlm and ntlmv2 authentication methods off CIFS_SessSetup() Signed-off-by: Sachin Prabhu Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/sess.c | 318 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 233 insertions(+), 85 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 36d0b908f8b4..4f481c133f6e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -754,6 +754,216 @@ sess_auth_lanman(struct sess_data *sess_data) } #endif +static void +sess_auth_ntlm(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + __u16 bytes_remaining; + + /* old style NTLM sessionsetup */ + /* wct = 13 */ + rc = sess_alloc_buffer(sess_data, 13); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); + pSMB->req_no_secext.CaseInsensitivePasswordLength = + cpu_to_le16(CIFS_AUTH_RESP_SIZE); + pSMB->req_no_secext.CaseSensitivePasswordLength = + cpu_to_le16(CIFS_AUTH_RESP_SIZE); + + /* calculate ntlm response and session key */ + rc = setup_ntlm_response(ses, sess_data->nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLM authentication\n", + rc); + goto out; + } + + /* copy ntlm response */ + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; + + if (ses->capabilities & CAP_UNICODE) { + /* unicode strings must be word aligned */ + if (sess_data->iov[0].iov_len % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } else { + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } + + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + if (smb_buf->WordCount != 3) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + + rc = sess_establish_session(sess_data); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; +} + +static void +sess_auth_ntlmv2(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + __u16 bytes_remaining; + + /* old style NTLM sessionsetup */ + /* wct = 13 */ + rc = sess_alloc_buffer(sess_data, 13); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); + + /* LM2 password would be here if we supported it */ + pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; + + /* calculate nlmv2 response and session key */ + rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc); + goto out; + } + + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + ses->auth_key.len - CIFS_SESS_KEY_SIZE); + bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; + + /* set case sensitive password length after tilen may get + * assigned, tilen is 0 otherwise. + */ + pSMB->req_no_secext.CaseSensitivePasswordLength = + cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + + if (ses->capabilities & CAP_UNICODE) { + if (sess_data->iov[0].iov_len % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } else { + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } + + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + if (smb_buf->WordCount != 3) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + + rc = sess_establish_session(sess_data); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; +} + + int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -801,6 +1011,12 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, case LANMAN: sess_auth_lanman(sess_data); goto out; + case NTLM: + sess_auth_ntlm(sess_data); + goto out; + case NTLMv2: + sess_auth_ntlmv2(sess_data); + goto out; default: cifs_dbg(FYI, "Continuing with CIFS_SessSetup\n"); } @@ -820,11 +1036,8 @@ ssetup_ntlmssp_authenticate: if (phase == NtLmChallenge) phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ - if ((type == NTLM) || (type == NTLMv2)) { - /* For NTLMv2 failures eventually may need to retry NTLM */ - wct = 13; /* old style NTLM sessionsetup */ - } else /* same size: negotiate or auth, NTLMSSP or extended security */ - wct = 12; + /* same size: negotiate or auth, NTLMSSP or extended security */ + wct = 12; rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, (void **)&smb_buf); @@ -859,70 +1072,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_base = NULL; iov[1].iov_len = 0; - if (type == NTLM) { - pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); - pSMB->req_no_secext.CaseInsensitivePasswordLength = - cpu_to_le16(CIFS_AUTH_RESP_SIZE); - pSMB->req_no_secext.CaseSensitivePasswordLength = - cpu_to_le16(CIFS_AUTH_RESP_SIZE); - - /* calculate ntlm response and session key */ - rc = setup_ntlm_response(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLM authentication\n", - rc); - goto ssetup_exit; - } - - /* copy ntlm response */ - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - CIFS_AUTH_RESP_SIZE); - bcc_ptr += CIFS_AUTH_RESP_SIZE; - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - CIFS_AUTH_RESP_SIZE); - bcc_ptr += CIFS_AUTH_RESP_SIZE; - - if (ses->capabilities & CAP_UNICODE) { - /* unicode strings must be word aligned */ - if (iov[0].iov_len % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else if (type == NTLMv2) { - pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); - - /* LM2 password would be here if we supported it */ - pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; - - /* calculate nlmv2 response and session key */ - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", - rc); - goto ssetup_exit; - } - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - ses->auth_key.len - CIFS_SESS_KEY_SIZE); - bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; - - /* set case sensitive password length after tilen may get - * assigned, tilen is 0 otherwise. - */ - pSMB->req_no_secext.CaseSensitivePasswordLength = - cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); - - if (ses->capabilities & CAP_UNICODE) { - if (iov[0].iov_len % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else if (type == Kerberos) { + if (type == Kerberos) { #ifdef CONFIG_CIFS_UPCALL struct cifs_spnego_msg *msg; @@ -1073,7 +1223,7 @@ ssetup_ntlmssp_authenticate: if (rc) goto ssetup_exit; - if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { + if (smb_buf->WordCount != 4) { rc = -EIO; cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); goto ssetup_exit; @@ -1088,22 +1238,20 @@ ssetup_ntlmssp_authenticate: bytes_remaining = get_bcc(smb_buf); bcc_ptr = pByteArea(smb_buf); - if (smb_buf->WordCount == 4) { - blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); - if (blob_len > bytes_remaining) { - cifs_dbg(VFS, "bad security blob length %d\n", - blob_len); - rc = -EINVAL; - goto ssetup_exit; - } - if (phase == NtLmChallenge) { - rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); - if (rc) - goto ssetup_exit; - } - bcc_ptr += blob_len; - bytes_remaining -= blob_len; + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto ssetup_exit; } + if (phase == NtLmChallenge) { + rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); + if (rc) + goto ssetup_exit; + } + bcc_ptr += blob_len; + bytes_remaining -= blob_len; /* BB check if Unicode and decode strings */ if (bytes_remaining == 0) { From ee03c646dd70dafb28a894119f6e8f0033c974dc Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Mon, 16 Jun 2014 15:35:27 +0100 Subject: [PATCH 04/35] cifs: Split Kerberos authentication off CIFS_SessSetup() Signed-off-by: Sachin Prabhu Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/sess.c | 215 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 153 insertions(+), 62 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 4f481c133f6e..b413fae993aa 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -963,6 +963,155 @@ out: ses->auth_key.response = NULL; } +#ifdef CONFIG_CIFS_UPCALL +static void +sess_auth_kerberos(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + __u16 bytes_remaining; + struct key *spnego_key = NULL; + struct cifs_spnego_msg *msg; + u16 blob_len; + + /* extended security */ + /* wct = 12 */ + rc = sess_alloc_buffer(sess_data, 12); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + spnego_key = cifs_get_spnego_key(ses); + if (IS_ERR(spnego_key)) { + rc = PTR_ERR(spnego_key); + spnego_key = NULL; + goto out; + } + + msg = spnego_key->payload.data; + /* + * check version field to make sure that cifs.upcall is + * sending us a response in an expected form + */ + if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { + cifs_dbg(VFS, + "incorrect version of cifs.upcall (expected %d but got %d)", + CIFS_SPNEGO_UPCALL_VERSION, msg->version); + rc = -EKEYREJECTED; + goto out_put_spnego_key; + } + + ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, + GFP_KERNEL); + if (!ses->auth_key.response) { + cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory", + msg->sesskey_len); + rc = -ENOMEM; + goto out_put_spnego_key; + } + ses->auth_key.len = msg->sesskey_len; + + pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; + capabilities |= CAP_EXTENDED_SECURITY; + pSMB->req.Capabilities = cpu_to_le32(capabilities); + sess_data->iov[1].iov_base = msg->data + msg->sesskey_len; + sess_data->iov[1].iov_len = msg->secblob_len; + pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len); + + if (ses->capabilities & CAP_UNICODE) { + /* unicode strings must be word aligned */ + if ((sess_data->iov[0].iov_len + + sess_data->iov[1].iov_len) % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp); + unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp); + } else { + /* BB: is this right? */ + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out_put_spnego_key; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + if (smb_buf->WordCount != 4) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out_put_spnego_key; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto out_put_spnego_key; + } + bcc_ptr += blob_len; + bytes_remaining -= blob_len; + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + + rc = sess_establish_session(sess_data); +out_put_spnego_key: + key_invalidate(spnego_key); + key_put(spnego_key); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; +} + +#else + +static void +sess_auth_kerberos(struct sess_data *sess_data) +{ + cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); + sess_data->result = -ENOSYS; + sess_data->func = NULL; +} +#endif /* ! CONFIG_CIFS_UPCALL */ int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, @@ -980,7 +1129,6 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, struct kvec iov[3]; enum securityEnum type; __u16 action, bytes_remaining; - struct key *spnego_key = NULL; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ u16 blob_len; char *ntlmsspblob = NULL; @@ -1017,6 +1165,9 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, case NTLMv2: sess_auth_ntlmv2(sess_data); goto out; + case Kerberos: + sess_auth_kerberos(sess_data); + goto out; default: cifs_dbg(FYI, "Continuing with CIFS_SessSetup\n"); } @@ -1072,63 +1223,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_base = NULL; iov[1].iov_len = 0; - if (type == Kerberos) { -#ifdef CONFIG_CIFS_UPCALL - struct cifs_spnego_msg *msg; - - spnego_key = cifs_get_spnego_key(ses); - if (IS_ERR(spnego_key)) { - rc = PTR_ERR(spnego_key); - spnego_key = NULL; - goto ssetup_exit; - } - - msg = spnego_key->payload.data; - /* check version field to make sure that cifs.upcall is - sending us a response in an expected form */ - if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { - cifs_dbg(VFS, "incorrect version of cifs.upcall " - "expected %d but got %d)", - CIFS_SPNEGO_UPCALL_VERSION, msg->version); - rc = -EKEYREJECTED; - goto ssetup_exit; - } - - ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, - GFP_KERNEL); - if (!ses->auth_key.response) { - cifs_dbg(VFS, - "Kerberos can't allocate (%u bytes) memory", - msg->sesskey_len); - rc = -ENOMEM; - goto ssetup_exit; - } - ses->auth_key.len = msg->sesskey_len; - - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - capabilities |= CAP_EXTENDED_SECURITY; - pSMB->req.Capabilities = cpu_to_le32(capabilities); - iov[1].iov_base = msg->data + msg->sesskey_len; - iov[1].iov_len = msg->secblob_len; - pSMB->req.SecurityBlobLength = cpu_to_le16(iov[1].iov_len); - - if (ses->capabilities & CAP_UNICODE) { - /* unicode strings must be word aligned */ - if ((iov[0].iov_len + iov[1].iov_len) % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_oslm_strings(&bcc_ptr, nls_cp); - unicode_domain_string(&bcc_ptr, ses, nls_cp); - } else - /* BB: is this right? */ - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); -#else /* ! CONFIG_CIFS_UPCALL */ - cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); - rc = -ENOSYS; - goto ssetup_exit; -#endif /* CONFIG_CIFS_UPCALL */ - } else if (type == RawNTLMSSP) { + if (type == RawNTLMSSP) { if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); rc = -ENOSYS; @@ -1268,10 +1363,6 @@ ssetup_ntlmssp_authenticate: } ssetup_exit: - if (spnego_key) { - key_invalidate(spnego_key); - key_put(spnego_key); - } kfree(str_area); kfree(ntlmsspblob); ntlmsspblob = NULL; From cc87c47d9d7ac25554aa81cd8ded56e75f79c198 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Mon, 16 Jun 2014 15:35:28 +0100 Subject: [PATCH 05/35] cifs: Separate rawntlmssp auth from CIFS_SessSetup() Separate rawntlmssp authentication from CIFS_SessSetup(). Also cleanup CIFS_SessSetup() since we no longer do any auth within it. Signed-off-by: Sachin Prabhu Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/sess.c | 566 ++++++++++++++++++++++++++----------------------- 1 file changed, 301 insertions(+), 265 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b413fae993aa..c2e7c07c9d5c 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1113,39 +1113,262 @@ sess_auth_kerberos(struct sess_data *sess_data) } #endif /* ! CONFIG_CIFS_UPCALL */ -int -CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *nls_cp) +/* + * The required kvec buffers have to be allocated before calling this + * function. + */ +static int +_sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data) { - int rc = 0; - int wct; struct smb_hdr *smb_buf; - char *bcc_ptr; - char *str_area; SESSION_SETUP_ANDX *pSMB; + struct cifs_ses *ses = sess_data->ses; __u32 capabilities; - __u16 count; - int resp_buf_type; - struct kvec iov[3]; - enum securityEnum type; - __u16 action, bytes_remaining; - __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ - u16 blob_len; - char *ntlmsspblob = NULL; - struct sess_data *sess_data; + char *bcc_ptr; - if (ses == NULL) { - WARN(1, "%s: ses == NULL!", __func__); - return -EINVAL; + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)pSMB; + + capabilities = cifs_ssetup_hdr(ses, pSMB); + if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { + cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); + return -ENOSYS; } - sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL); - if (!sess_data) - return -ENOMEM; - sess_data->xid = xid; - sess_data->ses = ses; - sess_data->buf0_type = CIFS_NO_BUFFER; - sess_data->nls_cp = (struct nls_table *) nls_cp; + pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; + capabilities |= CAP_EXTENDED_SECURITY; + pSMB->req.Capabilities |= cpu_to_le32(capabilities); + + bcc_ptr = sess_data->iov[2].iov_base; + /* unicode strings must be word aligned */ + if ((sess_data->iov[0].iov_len + sess_data->iov[1].iov_len) % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp); + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + return 0; +} + +static void +sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data); + +static void +sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) +{ + int rc; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + struct cifs_ses *ses = sess_data->ses; + __u16 bytes_remaining; + char *bcc_ptr; + u16 blob_len; + + cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n"); + + /* + * if memory allocation is successful, caller of this function + * frees it. + */ + ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); + if (!ses->ntlmssp) { + rc = -ENOMEM; + goto out; + } + ses->ntlmssp->sesskey_per_smbsess = false; + + /* wct = 12 */ + rc = sess_alloc_buffer(sess_data, 12); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + + /* Build security blob before we assemble the request */ + build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses); + sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); + sess_data->iov[1].iov_base = pSMB->req.SecurityBlob; + pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); + + rc = _sess_auth_rawntlmssp_assemble_req(sess_data); + if (rc) + goto out; + + rc = sess_sendreceive(sess_data); + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + /* If true, rc here is expected and not an error */ + if (sess_data->buf0_type != CIFS_NO_BUFFER && + smb_buf->Status.CifsError == + cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)) + rc = 0; + + if (rc) + goto out; + + cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); + + if (smb_buf->WordCount != 4) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto out; + } + + rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); +out: + sess_free_buffer(sess_data); + + if (!rc) { + sess_data->func = sess_auth_rawntlmssp_authenticate; + return; + } + + /* Else error. Cleanup */ + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + kfree(ses->ntlmssp); + ses->ntlmssp = NULL; + + sess_data->func = NULL; + sess_data->result = rc; +} + +static void +sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) +{ + int rc; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + struct cifs_ses *ses = sess_data->ses; + __u16 bytes_remaining; + char *bcc_ptr; + char *ntlmsspblob = NULL; + u16 blob_len; + + cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n"); + + /* wct = 12 */ + rc = sess_alloc_buffer(sess_data, 12); + if (rc) + goto out; + + /* Build security blob before we assemble the request */ + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)pSMB; + /* + * 5 is an empirical value, large enough to hold + * authenticate message plus max 10 of av paris, + * domain, user, workstation names, flags, etc. + */ + ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE), + GFP_KERNEL); + if (!ntlmsspblob) { + rc = -ENOMEM; + goto out; + } + + rc = build_ntlmssp_auth_blob(ntlmsspblob, + &blob_len, ses, sess_data->nls_cp); + if (rc) + goto out_free_ntlmsspblob; + sess_data->iov[1].iov_len = blob_len; + sess_data->iov[1].iov_base = ntlmsspblob; + pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); + /* + * Make sure that we tell the server that we are using + * the uid that it just gave us back on the response + * (challenge) + */ + smb_buf->Uid = ses->Suid; + + rc = _sess_auth_rawntlmssp_assemble_req(sess_data); + if (rc) + goto out_free_ntlmsspblob; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out_free_ntlmsspblob; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + if (smb_buf->WordCount != 4) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out_free_ntlmsspblob; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto out_free_ntlmsspblob; + } + bcc_ptr += blob_len; + bytes_remaining -= blob_len; + + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + +out_free_ntlmsspblob: + kfree(ntlmsspblob); +out: + sess_free_buffer(sess_data); + + if (!rc) + rc = sess_establish_session(sess_data); + + /* Cleanup */ + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + kfree(ses->ntlmssp); + ses->ntlmssp = NULL; + + sess_data->func = NULL; + sess_data->result = rc; +} + +int select_sec(struct cifs_ses *ses, struct sess_data *sess_data) +{ + int type; type = select_sectype(ses->server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); @@ -1157,261 +1380,74 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, switch (type) { case LANMAN: - sess_auth_lanman(sess_data); - goto out; + /* LANMAN and plaintext are less secure and off by default. + * So we make this explicitly be turned on in kconfig (in the + * build) and turned on at runtime (changed from the default) + * in proc/fs/cifs or via mount parm. Unfortunately this is + * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */ +#ifdef CONFIG_CIFS_WEAK_PW_HASH + sess_data->func = sess_auth_lanman; + break; +#else + return -EOPNOTSUPP; +#endif case NTLM: - sess_auth_ntlm(sess_data); - goto out; + sess_data->func = sess_auth_ntlm; + break; case NTLMv2: - sess_auth_ntlmv2(sess_data); - goto out; + sess_data->func = sess_auth_ntlmv2; + break; case Kerberos: - sess_auth_kerberos(sess_data); - goto out; +#ifdef CONFIG_CIFS_UPCALL + sess_data->func = sess_auth_kerberos; + break; +#else + cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); + return -ENOSYS; + break; +#endif /* CONFIG_CIFS_UPCALL */ + case RawNTLMSSP: + sess_data->func = sess_auth_rawntlmssp_negotiate; + break; default: - cifs_dbg(FYI, "Continuing with CIFS_SessSetup\n"); - } - - if (type == RawNTLMSSP) { - /* if memory allocation is successful, caller of this function - * frees it. - */ - ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); - if (!ses->ntlmssp) - return -ENOMEM; - ses->ntlmssp->sesskey_per_smbsess = false; - - } - -ssetup_ntlmssp_authenticate: - if (phase == NtLmChallenge) - phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ - - /* same size: negotiate or auth, NTLMSSP or extended security */ - wct = 12; - - rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, - (void **)&smb_buf); - if (rc) - return rc; - - pSMB = (SESSION_SETUP_ANDX *)smb_buf; - - capabilities = cifs_ssetup_hdr(ses, pSMB); - - /* we will send the SMB in three pieces: - a fixed length beginning part, an optional - SPNEGO blob (which can be zero length), and a - last part which will include the strings - and rest of bcc area. This allows us to avoid - a large buffer 17K allocation */ - iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4; - - /* setting this here allows the code at the end of the function - to free the request buffer if there's an error */ - resp_buf_type = CIFS_SMALL_BUFFER; - - /* 2000 big enough to fit max user, domain, NOS name etc. */ - str_area = kmalloc(2000, GFP_KERNEL); - if (str_area == NULL) { - rc = -ENOMEM; - goto ssetup_exit; - } - bcc_ptr = str_area; - - iov[1].iov_base = NULL; - iov[1].iov_len = 0; - - if (type == RawNTLMSSP) { - if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { - cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); - rc = -ENOSYS; - goto ssetup_exit; - } - - cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase); - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - capabilities |= CAP_EXTENDED_SECURITY; - pSMB->req.Capabilities |= cpu_to_le32(capabilities); - switch(phase) { - case NtLmNegotiate: - build_ntlmssp_negotiate_blob( - pSMB->req.SecurityBlob, ses); - iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); - iov[1].iov_base = pSMB->req.SecurityBlob; - pSMB->req.SecurityBlobLength = - cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); - break; - case NtLmAuthenticate: - /* - * 5 is an empirical value, large enough to hold - * authenticate message plus max 10 of av paris, - * domain, user, workstation names, flags, etc. - */ - ntlmsspblob = kzalloc( - 5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - rc = -ENOMEM; - goto ssetup_exit; - } - - rc = build_ntlmssp_auth_blob(ntlmsspblob, - &blob_len, ses, nls_cp); - if (rc) - goto ssetup_exit; - iov[1].iov_len = blob_len; - iov[1].iov_base = ntlmsspblob; - pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); - /* - * Make sure that we tell the server that we are using - * the uid that it just gave us back on the response - * (challenge) - */ - smb_buf->Uid = ses->Suid; - break; - default: - cifs_dbg(VFS, "invalid phase %d\n", phase); - rc = -ENOSYS; - goto ssetup_exit; - } - /* unicode strings must be word aligned */ - if ((iov[0].iov_len + iov[1].iov_len) % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_oslm_strings(&bcc_ptr, nls_cp); - } else { cifs_dbg(VFS, "secType %d not supported!\n", type); - rc = -ENOSYS; - goto ssetup_exit; + return -ENOSYS; } - iov[2].iov_base = str_area; - iov[2].iov_len = (long) bcc_ptr - (long) str_area; + return 0; +} - count = iov[1].iov_len + iov[2].iov_len; - smb_buf->smb_buf_length = - cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count); +int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_cp) +{ + int rc = 0; + struct sess_data *sess_data; - put_bcc(count, smb_buf); - - rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, - CIFS_LOG_ERROR); - /* SMB request buf freed in SendReceive2 */ - - pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; - smb_buf = (struct smb_hdr *)iov[0].iov_base; - - if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && - (smb_buf->Status.CifsError == - cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { - if (phase != NtLmNegotiate) { - cifs_dbg(VFS, "Unexpected more processing error\n"); - goto ssetup_exit; - } - /* NTLMSSP Negotiate sent now processing challenge (response) */ - phase = NtLmChallenge; /* process ntlmssp challenge */ - rc = 0; /* MORE_PROC rc is not an error here, but expected */ + if (ses == NULL) { + WARN(1, "%s: ses == NULL!", __func__); + return -EINVAL; } + + sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL); + if (!sess_data) + return -ENOMEM; + + rc = select_sec(ses, sess_data); if (rc) - goto ssetup_exit; + goto out; - if (smb_buf->WordCount != 4) { - rc = -EIO; - cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); - goto ssetup_exit; - } - action = le16_to_cpu(pSMB->resp.Action); - if (action & GUEST_LOGIN) - cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ - ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ - cifs_dbg(FYI, "UID = %llu\n", ses->Suid); - /* response can have either 3 or 4 word count - Samba sends 3 */ - /* and lanman response is 3 */ - bytes_remaining = get_bcc(smb_buf); - bcc_ptr = pByteArea(smb_buf); + sess_data->xid = xid; + sess_data->ses = ses; + sess_data->buf0_type = CIFS_NO_BUFFER; + sess_data->nls_cp = (struct nls_table *) nls_cp; - blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); - if (blob_len > bytes_remaining) { - cifs_dbg(VFS, "bad security blob length %d\n", - blob_len); - rc = -EINVAL; - goto ssetup_exit; - } - if (phase == NtLmChallenge) { - rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); - if (rc) - goto ssetup_exit; - } - bcc_ptr += blob_len; - bytes_remaining -= blob_len; + while (sess_data->func) + sess_data->func(sess_data); - /* BB check if Unicode and decode strings */ - if (bytes_remaining == 0) { - /* no string area to decode, do nothing */ - } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { - /* unicode string area must be word-aligned */ - if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { - ++bcc_ptr; - --bytes_remaining; - } - decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); - } else { - decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); - } - -ssetup_exit: - kfree(str_area); - kfree(ntlmsspblob); - ntlmsspblob = NULL; - if (resp_buf_type == CIFS_SMALL_BUFFER) { - cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base); - cifs_small_buf_release(iov[0].iov_base); - } else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); - - /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ - if ((phase == NtLmChallenge) && (rc == 0)) - goto ssetup_ntlmssp_authenticate; - - if (!rc) { - mutex_lock(&ses->server->srv_mutex); - if (!ses->server->session_estab) { - if (ses->server->sign) { - ses->server->session_key.response = - kmemdup(ses->auth_key.response, - ses->auth_key.len, GFP_KERNEL); - if (!ses->server->session_key.response) { - rc = -ENOMEM; - mutex_unlock(&ses->server->srv_mutex); - goto keycp_exit; - } - ses->server->session_key.len = - ses->auth_key.len; - } - ses->server->sequence_number = 0x2; - ses->server->session_estab = true; - } - mutex_unlock(&ses->server->srv_mutex); - - cifs_dbg(FYI, "CIFS session established successfully\n"); - spin_lock(&GlobalMid_Lock); - ses->status = CifsGood; - ses->need_reconnect = false; - spin_unlock(&GlobalMid_Lock); - } - -keycp_exit: - kfree(ses->auth_key.response); - ses->auth_key.response = NULL; - kfree(ses->ntlmssp); - - return rc; + /* Store result before we free sess_data */ + rc = sess_data->result; out: - rc = sess_data->result; kfree(sess_data); return rc; } From 038bc961c31b070269ecd07349a7ee2e839d4fec Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 27 Jun 2014 10:33:11 +0400 Subject: [PATCH 06/35] CIFS: Fix async reading on reconnects If we get into read_into_pages() from cifs_readv_receive() and then loose a network, we issue cifs_reconnect that moves all mids to a private list and issue their callbacks. The callback of the async read request sets a mid to retry, frees it and wakes up a process that waits on the rdata completion. After the connection is established we return from read_into_pages() with a short read, use the mid that was freed before and try to read the remaining data from the a newly created socket. Both actions are not what we want to do. In reconnect cases (-EAGAIN) we should not mask off the error with a short read but should return the error code instead. Acked-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e90a1e9aa627..6b6df30cfd89 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2823,7 +2823,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 ? total_read : result; + return total_read > 0 && result != -EAGAIN ? total_read : result; } ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) @@ -3231,7 +3231,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 ? total_read : result; + return total_read > 0 && result != -EAGAIN ? total_read : result; } static int cifs_readpages(struct file *file, struct address_space *mapping, From 7e48ff82026d99fe498a280faa55e5842288d72f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 19 Jun 2014 15:01:03 +0400 Subject: [PATCH 07/35] CIFS: Separate page processing from writepages Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/file.c | 152 ++++++++++++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 70 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6b6df30cfd89..69d176396d04 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1878,6 +1878,86 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } +static unsigned int +wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, + struct address_space *mapping, + struct writeback_control *wbc, + pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done) +{ + unsigned int nr_pages = 0, i; + struct page *page; + + for (i = 0; i < found_pages; i++) { + page = wdata->pages[i]; + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + + if (nr_pages == 0) + lock_page(page); + else if (!trylock_page(page)) + break; + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + break; + } + + if (!wbc->range_cyclic && page->index > end) { + *done = true; + unlock_page(page); + break; + } + + if (*next && (page->index != *next)) { + /* Not next consecutive page */ + unlock_page(page); + break; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + break; + } + + /* + * This actually clears the dirty bit in the radix tree. + * See cifs_writepage() for more commentary. + */ + set_page_writeback(page); + if (page_offset(page) >= i_size_read(mapping->host)) { + *done = true; + unlock_page(page); + end_page_writeback(page); + break; + } + + wdata->pages[i] = page; + *next = page->index + 1; + ++nr_pages; + } + + /* reset index to refind any pages skipped */ + if (nr_pages == 0) + *index = wdata->pages[0]->index + 1; + + /* put any pages we aren't going to use */ + for (i = nr_pages; i < found_pages; i++) { + page_cache_release(wdata->pages[i]); + wdata->pages[i] = NULL; + } + + return nr_pages; +} + static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1886,7 +1966,6 @@ static int cifs_writepages(struct address_space *mapping, pgoff_t end, index; struct cifs_writedata *wdata; struct TCP_Server_Info *server; - struct page *page; int rc = 0; /* @@ -1944,75 +2023,8 @@ retry: break; } - nr_pages = 0; - for (i = 0; i < found_pages; i++) { - page = wdata->pages[i]; - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - - if (nr_pages == 0) - lock_page(page); - else if (!trylock_page(page)) - break; - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - break; - } - - if (!wbc->range_cyclic && page->index > end) { - done = true; - unlock_page(page); - break; - } - - if (next && (page->index != next)) { - /* Not next consecutive page */ - unlock_page(page); - break; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - break; - } - - /* - * This actually clears the dirty bit in the radix tree. - * See cifs_writepage() for more commentary. - */ - set_page_writeback(page); - - if (page_offset(page) >= i_size_read(mapping->host)) { - done = true; - unlock_page(page); - end_page_writeback(page); - break; - } - - wdata->pages[i] = page; - next = page->index + 1; - ++nr_pages; - } - - /* reset index to refind any pages skipped */ - if (nr_pages == 0) - index = wdata->pages[0]->index + 1; - - /* put any pages we aren't going to use */ - for (i = nr_pages; i < found_pages; i++) { - page_cache_release(wdata->pages[i]); - wdata->pages[i] = NULL; - } + nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc, + end, &index, &next, &done); /* nothing to write? */ if (nr_pages == 0) { From 27924075b548f5c7cf4268f77bda6a7471ffdfac Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 11 Jul 2014 07:47:01 -0500 Subject: [PATCH 08/35] Remove sparse build warning The recent session setup patch set (cifs-Separate-rawntlmssp-auth-from-CIFS_SessSetup.patch) had introduced a trivial sparse build warning. Signed-off-by: Steve French Cc: Sachin Prabhu --- fs/cifs/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index c2e7c07c9d5c..39ee32688eac 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1366,7 +1366,7 @@ out: sess_data->result = rc; } -int select_sec(struct cifs_ses *ses, struct sess_data *sess_data) +static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data) { int type; From 619aa48edbab47367fa8a65e568f63fd64d6b4af Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 19 Jun 2014 15:28:37 +0400 Subject: [PATCH 09/35] CIFS: Separate page sending from writepages Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 68 ++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 69d176396d04..e76581ffcf77 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1958,6 +1958,43 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, return nr_pages; } +static int +wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, + struct address_space *mapping, struct writeback_control *wbc) +{ + int rc = 0; + struct TCP_Server_Info *server; + unsigned int i; + + wdata->sync_mode = wbc->sync_mode; + wdata->nr_pages = nr_pages; + wdata->offset = page_offset(wdata->pages[0]); + wdata->pagesz = PAGE_CACHE_SIZE; + wdata->tailsz = min(i_size_read(mapping->host) - + page_offset(wdata->pages[nr_pages - 1]), + (loff_t)PAGE_CACHE_SIZE); + wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz; + + do { + if (wdata->cfile != NULL) + cifsFileInfo_put(wdata->cfile); + wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); + if (!wdata->cfile) { + cifs_dbg(VFS, "No writable handles for inode\n"); + rc = -EBADF; + break; + } + wdata->pid = wdata->cfile->pid; + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata, cifs_writedata_release); + } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); + + for (i = 0; i < nr_pages; ++i) + unlock_page(wdata->pages[i]); + + return rc; +} + static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1965,7 +2002,6 @@ static int cifs_writepages(struct address_space *mapping, bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; - struct TCP_Server_Info *server; int rc = 0; /* @@ -2032,35 +2068,7 @@ retry: continue; } - wdata->sync_mode = wbc->sync_mode; - wdata->nr_pages = nr_pages; - wdata->offset = page_offset(wdata->pages[0]); - wdata->pagesz = PAGE_CACHE_SIZE; - wdata->tailsz = - min(i_size_read(mapping->host) - - page_offset(wdata->pages[nr_pages - 1]), - (loff_t)PAGE_CACHE_SIZE); - wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + - wdata->tailsz; - - do { - if (wdata->cfile != NULL) - cifsFileInfo_put(wdata->cfile); - wdata->cfile = find_writable_file(CIFS_I(mapping->host), - false); - if (!wdata->cfile) { - cifs_dbg(VFS, "No writable handles for inode\n"); - rc = -EBADF; - break; - } - wdata->pid = wdata->cfile->pid; - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - rc = server->ops->async_writev(wdata, - cifs_writedata_release); - } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); - - for (i = 0; i < nr_pages; ++i) - unlock_page(wdata->pages[i]); + rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); /* send failure -- clean up the mess */ if (rc != 0) { From 90ac1387c2dfcd9b4bd302fce03b9ddff73d0093 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 19 Jun 2014 16:11:00 +0400 Subject: [PATCH 10/35] CIFS: Separate pages initialization from writepages Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 56 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e76581ffcf77..0064d38fdb76 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1878,6 +1878,40 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } +static struct cifs_writedata * +wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, + pgoff_t end, pgoff_t *index, + unsigned int *found_pages) +{ + unsigned int nr_pages; + struct page **pages; + struct cifs_writedata *wdata; + + wdata = cifs_writedata_alloc((unsigned int)tofind, + cifs_writev_complete); + if (!wdata) + return NULL; + + /* + * find_get_pages_tag seems to return a max of 256 on each + * iteration, so we must call it several times in order to + * fill the array or the wsize is effectively limited to + * 256 * PAGE_CACHE_SIZE. + */ + *found_pages = 0; + pages = wdata->pages; + do { + nr_pages = find_get_pages_tag(mapping, index, + PAGECACHE_TAG_DIRTY, tofind, + pages); + *found_pages += nr_pages; + tofind -= nr_pages; + pages += nr_pages; + } while (nr_pages && tofind && *index <= end); + + return wdata; +} + static unsigned int wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, struct address_space *mapping, @@ -2025,35 +2059,17 @@ retry: while (!done && index <= end) { unsigned int i, nr_pages, found_pages; pgoff_t next = 0, tofind; - struct page **pages; tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1; - wdata = cifs_writedata_alloc((unsigned int)tofind, - cifs_writev_complete); + wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index, + &found_pages); if (!wdata) { rc = -ENOMEM; break; } - /* - * find_get_pages_tag seems to return a max of 256 on each - * iteration, so we must call it several times in order to - * fill the array or the wsize is effectively limited to - * 256 * PAGE_CACHE_SIZE. - */ - found_pages = 0; - pages = wdata->pages; - do { - nr_pages = find_get_pages_tag(mapping, &index, - PAGECACHE_TAG_DIRTY, - tofind, pages); - found_pages += nr_pages; - tofind -= nr_pages; - pages += nr_pages; - } while (nr_pages && tofind && index <= end); - if (found_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); break; From 66231a47965c551d3056d5104f8b06688065748c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 19 Jun 2014 16:15:16 +0400 Subject: [PATCH 11/35] CIFS: Fix wsize usage in writepages If a server change maximum buffer size for write (wsize) requests on reconnect we can fail on repeating with a big size buffer on -EAGAIN error in writepages. Fix this by checking wsize all the time before repeating request in writepages. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0064d38fdb76..ff4a9c3fe421 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2009,19 +2009,17 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, (loff_t)PAGE_CACHE_SIZE); wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz; - do { - if (wdata->cfile != NULL) - cifsFileInfo_put(wdata->cfile); - wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); - if (!wdata->cfile) { - cifs_dbg(VFS, "No writable handles for inode\n"); - rc = -EBADF; - break; - } + if (wdata->cfile != NULL) + cifsFileInfo_put(wdata->cfile); + wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); + if (!wdata->cfile) { + cifs_dbg(VFS, "No writable handles for inode\n"); + rc = -EBADF; + } else { wdata->pid = wdata->cfile->pid; server = tlink_tcon(wdata->cfile->tlink)->ses->server; rc = server->ops->async_writev(wdata, cifs_writedata_release); - } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); + } for (i = 0; i < nr_pages; ++i) unlock_page(wdata->pages[i]); @@ -2058,7 +2056,7 @@ static int cifs_writepages(struct address_space *mapping, retry: while (!done && index <= end) { unsigned int i, nr_pages, found_pages; - pgoff_t next = 0, tofind; + pgoff_t next = 0, tofind, saved_index = index; tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1; @@ -2102,6 +2100,11 @@ retry: } kref_put(&wdata->refcount, cifs_writedata_release); + if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) { + index = saved_index; + continue; + } + wbc->nr_to_write -= nr_pages; if (wbc->nr_to_write <= 0) done = true; From 7f6c50086a6f5bc0fee46548afc836070a439313 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 22 Jun 2014 11:03:22 +0400 Subject: [PATCH 12/35] CIFS: Fix cifs_writev_requeue when wsize changes If wsize changes on reconnect we need to use new writedata structure that for retrying. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/cifssmb.c | 84 +++++++++++++++++++++++++++++++++++++--------- fs/cifs/smb1ops.c | 7 ++++ fs/cifs/smb2ops.c | 10 ++++++ 4 files changed, 87 insertions(+), 16 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de6aed8c78e5..8c53f2093152 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -404,6 +404,8 @@ struct smb_version_operations { const struct cifs_fid *, u32 *); int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, int); + /* writepages retry size */ + unsigned int (*wp_retry_size)(struct inode *); }; struct smb_version_values { diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b7e5b6508caa..52cfd8cf0db0 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1896,28 +1896,80 @@ cifs_writedata_release(struct kref *refcount) static void cifs_writev_requeue(struct cifs_writedata *wdata) { - int i, rc; + int i, rc = 0; struct inode *inode = wdata->cfile->dentry->d_inode; struct TCP_Server_Info *server; + unsigned int rest_len; - for (i = 0; i < wdata->nr_pages; i++) { - lock_page(wdata->pages[i]); - clear_page_dirty_for_io(wdata->pages[i]); - } - + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + i = 0; + rest_len = wdata->bytes; do { - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - rc = server->ops->async_writev(wdata, cifs_writedata_release); - } while (rc == -EAGAIN); + struct cifs_writedata *wdata2; + unsigned int j, nr_pages, wsize, tailsz, cur_len; - for (i = 0; i < wdata->nr_pages; i++) { - unlock_page(wdata->pages[i]); - if (rc != 0) { - SetPageError(wdata->pages[i]); - end_page_writeback(wdata->pages[i]); - page_cache_release(wdata->pages[i]); + wsize = server->ops->wp_retry_size(inode); + if (wsize < rest_len) { + nr_pages = wsize / PAGE_CACHE_SIZE; + if (!nr_pages) { + rc = -ENOTSUPP; + break; + } + cur_len = nr_pages * PAGE_CACHE_SIZE; + tailsz = PAGE_CACHE_SIZE; + } else { + nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE); + cur_len = rest_len; + tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE; } - } + + wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete); + if (!wdata2) { + rc = -ENOMEM; + break; + } + + for (j = 0; j < nr_pages; j++) { + wdata2->pages[j] = wdata->pages[i + j]; + lock_page(wdata2->pages[j]); + clear_page_dirty_for_io(wdata2->pages[j]); + } + + wdata2->sync_mode = wdata->sync_mode; + wdata2->nr_pages = nr_pages; + wdata2->offset = page_offset(wdata2->pages[0]); + wdata2->pagesz = PAGE_CACHE_SIZE; + wdata2->tailsz = tailsz; + wdata2->bytes = cur_len; + + wdata2->cfile = find_writable_file(CIFS_I(inode), false); + if (!wdata2->cfile) { + cifs_dbg(VFS, "No writable handles for inode\n"); + rc = -EBADF; + break; + } + wdata2->pid = wdata2->cfile->pid; + rc = server->ops->async_writev(wdata2, cifs_writedata_release); + + for (j = 0; j < nr_pages; j++) { + unlock_page(wdata2->pages[j]); + if (rc != 0 && rc != -EAGAIN) { + SetPageError(wdata2->pages[j]); + end_page_writeback(wdata2->pages[j]); + page_cache_release(wdata2->pages[j]); + } + } + + if (rc) { + kref_put(&wdata2->refcount, cifs_writedata_release); + if (rc == -EAGAIN) + continue; + break; + } + + rest_len -= cur_len; + i += nr_pages; + } while (i < wdata->nr_pages); mapping_set_error(inode->i_mapping, rc); kref_put(&wdata->refcount, cifs_writedata_release); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d1fdfa848703..8a963426d810 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock) return oplock == OPLOCK_READ; } +static unsigned int +cifs_wp_retry_size(struct inode *inode) +{ + return CIFS_SB(inode->i_sb)->wsize; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1078,6 +1084,7 @@ struct smb_version_operations smb1_operations = { .query_mf_symlink = cifs_query_mf_symlink, .create_mf_symlink = cifs_create_mf_symlink, .is_read_op = cifs_is_read_op, + .wp_retry_size = cifs_wp_retry_size, #ifdef CONFIG_CIFS_XATTR .query_all_EAs = CIFSSMBQAllEAs, .set_EA = CIFSSMBSetEA, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 787844bde384..e35ce5b3d88f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1104,6 +1104,13 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch) return le32_to_cpu(lc->lcontext.LeaseState); } +static unsigned int +smb2_wp_retry_size(struct inode *inode) +{ + return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize, + SMB2_MAX_BUFFER_SIZE); +} + struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -1177,6 +1184,7 @@ struct smb_version_operations smb20_operations = { .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, + .wp_retry_size = smb2_wp_retry_size, }; struct smb_version_operations smb21_operations = { @@ -1252,6 +1260,7 @@ struct smb_version_operations smb21_operations = { .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, + .wp_retry_size = smb2_wp_retry_size, }; struct smb_version_operations smb30_operations = { @@ -1330,6 +1339,7 @@ struct smb_version_operations smb30_operations = { .parse_lease_buf = smb3_parse_lease_buf, .clone_range = smb2_clone_range, .validate_negotiate = smb3_validate_negotiate, + .wp_retry_size = smb2_wp_retry_size, }; struct smb_version_values smb20_values = { From 66386c08be5d1a2eefc1f7ab8c008561b6c811e5 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 20 Jun 2014 15:48:40 +0400 Subject: [PATCH 13/35] CIFS: Separate filling pages from iovec write Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 84 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff4a9c3fe421..f96f61c849fe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2423,11 +2423,53 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata) return rc; } +static int +wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, + size_t *len, unsigned long *num_pages) +{ + size_t save_len, copied, bytes, cur_len = *len; + unsigned long i, nr_pages = *num_pages; + + save_len = cur_len; + for (i = 0; i < nr_pages; i++) { + bytes = min_t(const size_t, cur_len, PAGE_SIZE); + copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from); + cur_len -= copied; + /* + * If we didn't copy as much as we expected, then that + * may mean we trod into an unmapped area. Stop copying + * at that point. On the next pass through the big + * loop, we'll likely end up getting a zero-length + * write and bailing out of it. + */ + if (copied < bytes) + break; + } + cur_len = save_len - cur_len; + *len = cur_len; + + /* + * If we have no data to send, then that probably means that + * the copy above failed altogether. That's most likely because + * the address in the iovec was bogus. Return -EFAULT and let + * the caller free anything we allocated and bail out. + */ + if (!cur_len) + return -EFAULT; + + /* + * i + 1 now represents the number of pages we actually used in + * the copy phase above. + */ + *num_pages = i + 1; + return 0; +} + static ssize_t cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) { - unsigned long nr_pages, i; - size_t bytes, copied, len, cur_len; + unsigned long nr_pages, num_pages, i; + size_t len, cur_len; ssize_t total_written = 0; loff_t offset; struct cifsFileInfo *open_file; @@ -2464,8 +2506,6 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) pid = current->tgid; do { - size_t save_len; - nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); wdata = cifs_writedata_alloc(nr_pages, cifs_uncached_writev_complete); @@ -2480,44 +2520,20 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) break; } - save_len = cur_len; - for (i = 0; i < nr_pages; i++) { - bytes = min_t(size_t, cur_len, PAGE_SIZE); - copied = copy_page_from_iter(wdata->pages[i], 0, bytes, - from); - cur_len -= copied; - /* - * If we didn't copy as much as we expected, then that - * may mean we trod into an unmapped area. Stop copying - * at that point. On the next pass through the big - * loop, we'll likely end up getting a zero-length - * write and bailing out of it. - */ - if (copied < bytes) - break; - } - cur_len = save_len - cur_len; - - /* - * If we have no data to send, then that probably means that - * the copy above failed altogether. That's most likely because - * the address in the iovec was bogus. Set the rc to -EFAULT, - * free anything we allocated and bail out. - */ - if (!cur_len) { + num_pages = nr_pages; + rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); + if (rc) { for (i = 0; i < nr_pages; i++) put_page(wdata->pages[i]); kfree(wdata); - rc = -EFAULT; break; } /* - * i + 1 now represents the number of pages we actually used in - * the copy phase above. Bring nr_pages down to that, and free - * any pages that we didn't use. + * Bring nr_pages down to the number of pages we actually used, + * and free any pages that we didn't use. */ - for ( ; nr_pages > i + 1; nr_pages--) + for ( ; nr_pages > num_pages; nr_pages--) put_page(wdata->pages[nr_pages - 1]); wdata->sync_mode = WB_SYNC_ALL; From 43de94eadf0ceda54509335343bdc1349a2c5ab3 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 20 Jun 2014 16:10:52 +0400 Subject: [PATCH 14/35] CIFS: Separate writing from iovec write Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 76 +++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index f96f61c849fe..666069811ab8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2465,41 +2465,17 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, return 0; } -static ssize_t -cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) +static int +cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, + struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *wdata_list) { + int rc = 0; + size_t cur_len; unsigned long nr_pages, num_pages, i; - size_t len, cur_len; - ssize_t total_written = 0; - loff_t offset; - struct cifsFileInfo *open_file; - struct cifs_tcon *tcon; - struct cifs_sb_info *cifs_sb; - struct cifs_writedata *wdata, *tmp; - struct list_head wdata_list; - int rc; + struct cifs_writedata *wdata; pid_t pid; - len = iov_iter_count(from); - rc = generic_write_checks(file, poffset, &len, 0); - if (rc) - return rc; - - if (!len) - return 0; - - iov_iter_truncate(from, len); - - INIT_LIST_HEAD(&wdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_writev) - return -ENOSYS; - - offset = *poffset; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else @@ -2551,11 +2527,47 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) break; } - list_add_tail(&wdata->list, &wdata_list); + list_add_tail(&wdata->list, wdata_list); offset += cur_len; len -= cur_len; } while (len > 0); + return rc; +} + +static ssize_t +cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) +{ + size_t len; + ssize_t total_written = 0; + struct cifsFileInfo *open_file; + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + struct cifs_writedata *wdata, *tmp; + struct list_head wdata_list; + int rc; + + len = iov_iter_count(from); + rc = generic_write_checks(file, poffset, &len, 0); + if (rc) + return rc; + + if (!len) + return 0; + + iov_iter_truncate(from, len); + + INIT_LIST_HEAD(&wdata_list); + cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + + rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, + &wdata_list); + /* * If at least one write was successfully sent, then discard any rc * value from the later writes. If the other write succeeds, then From 6ec0b01b2691d1465bb7219e031e8bf38ccd9397 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 20 Jun 2014 16:30:46 +0400 Subject: [PATCH 15/35] CIFS: Fix wsize usage in iovec write If a server change maximum buffer size for write (wsize) requests on reconnect we can fail on repeating with a big size buffer on -EAGAIN error in iovec write. Fix this by checking wsize all the time before repeating request in iovec write. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 4 --- fs/cifs/file.c | 63 +++++++++++++++++++++++++++++------------------ fs/cifs/smb2pdu.c | 4 --- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 52cfd8cf0db0..1b3f0aef01e3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -196,10 +196,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (rc) goto out; - /* - * FIXME: check if wsize needs updated due to negotiated smb buffer - * size shrinking - */ atomic_inc(&tconInfoReconnectCount); /* tell server Unix caps we support */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 666069811ab8..c9c4f5ac3c78 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2401,28 +2401,6 @@ cifs_uncached_writev_complete(struct work_struct *work) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } -/* attempt to send write to server, retry on any -EAGAIN errors */ -static int -cifs_uncached_retry_writev(struct cifs_writedata *wdata) -{ - int rc; - struct TCP_Server_Info *server; - - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - - do { - if (wdata->cfile->invalidHandle) { - rc = cifs_reopen_file(wdata->cfile, false); - if (rc != 0) - continue; - } - rc = server->ops->async_writev(wdata, - cifs_uncached_writedata_release); - } while (rc == -EAGAIN); - - return rc; -} - static int wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, size_t *len, unsigned long *num_pages) @@ -2474,13 +2452,19 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, size_t cur_len; unsigned long nr_pages, num_pages, i; struct cifs_writedata *wdata; + struct iov_iter saved_from; + loff_t saved_offset = offset; pid_t pid; + struct TCP_Server_Info *server; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; + server = tlink_tcon(open_file->tlink)->ses->server; + memcpy(&saved_from, from, sizeof(struct iov_iter)); + do { nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); wdata = cifs_writedata_alloc(nr_pages, @@ -2520,10 +2504,20 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, wdata->bytes = cur_len; wdata->pagesz = PAGE_SIZE; wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); - rc = cifs_uncached_retry_writev(wdata); + + if (!wdata->cfile->invalidHandle || + !cifs_reopen_file(wdata->cfile, false)) + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); if (rc) { kref_put(&wdata->refcount, cifs_uncached_writedata_release); + if (rc == -EAGAIN) { + memcpy(from, &saved_from, + sizeof(struct iov_iter)); + iov_iter_advance(from, offset - saved_offset); + continue; + } break; } @@ -2545,6 +2539,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) struct cifs_sb_info *cifs_sb; struct cifs_writedata *wdata, *tmp; struct list_head wdata_list; + struct iov_iter saved_from; int rc; len = iov_iter_count(from); @@ -2565,6 +2560,8 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) if (!tcon->ses->server->ops->async_writev) return -ENOSYS; + memcpy(&saved_from, from, sizeof(struct iov_iter)); + rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, &wdata_list); @@ -2596,7 +2593,25 @@ restart_loop: /* resend call if it's a retryable error */ if (rc == -EAGAIN) { - rc = cifs_uncached_retry_writev(wdata); + struct list_head tmp_list; + struct iov_iter tmp_from; + + INIT_LIST_HEAD(&tmp_list); + list_del_init(&wdata->list); + + memcpy(&tmp_from, &saved_from, + sizeof(struct iov_iter)); + iov_iter_advance(&tmp_from, + wdata->offset - *poffset); + + rc = cifs_write_from_iter(wdata->offset, + wdata->bytes, &tmp_from, + open_file, cifs_sb, &tmp_list); + + list_splice(&tmp_list, &wdata_list); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); goto restart_loop; } } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0158104df745..da7aa62bf061 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -245,10 +245,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (rc) goto out; atomic_inc(&tconInfoReconnectCount); - /* - * BB FIXME add code to check if wsize needs update due to negotiated - * smb buffer size shrinking. - */ out: /* * Check if handle based operation so we know whether we can continue From cb7e9eabb2b584884db0d11ae0376d31ac1cfdc1 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 5 Jun 2014 19:03:27 +0400 Subject: [PATCH 16/35] CIFS: Use multicredits for SMB 2.1/3 writes If we negotiate SMB 2.1 and higher version of the protocol and a server supports large write buffer size, we need to consume 1 credit per 65536 bytes. So, we need to know how many credits we have and obtain the required number of them before constructing a writedata structure in writepages and iovec write. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 15 +++++++++++ fs/cifs/cifsproto.h | 3 +++ fs/cifs/file.c | 36 ++++++++++++++++++++++----- fs/cifs/smb1ops.c | 1 + fs/cifs/smb2ops.c | 55 +++++++++++++++++++++++++++++++++++++++-- fs/cifs/smb2pdu.c | 30 ++++++++++++++++++---- fs/cifs/smb2transport.c | 5 ++++ fs/cifs/transport.c | 25 +++++++++++++------ 8 files changed, 150 insertions(+), 20 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8c53f2093152..54ca2b985a0d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -406,6 +406,9 @@ struct smb_version_operations { int); /* writepages retry size */ unsigned int (*wp_retry_size)(struct inode *); + /* get mtu credits */ + int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, + unsigned int *, unsigned int *); }; struct smb_version_values { @@ -641,6 +644,16 @@ add_credits(struct TCP_Server_Info *server, const unsigned int add, server->ops->add_credits(server, add, optype); } +static inline void +add_credits_and_wake_if(struct TCP_Server_Info *server, const unsigned int add, + const int optype) +{ + if (add) { + server->ops->add_credits(server, add, optype); + wake_up(&server->request_q); + } +} + static inline void set_credits(struct TCP_Server_Info *server, const int val) { @@ -1075,6 +1088,7 @@ struct cifs_writedata { int result; unsigned int pagesz; unsigned int tailsz; + unsigned int credits; unsigned int nr_pages; struct page *pages[]; }; @@ -1400,6 +1414,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define CIFS_OBREAK_OP 0x0100 /* oplock break request */ #define CIFS_NEG_OP 0x0200 /* negotiate request */ #define CIFS_OP_MASK 0x0380 /* mask request type */ +#define CIFS_HAS_CREDITS 0x0400 /* already has credits */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index de49d7a37b00..c31ce98c1704 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -90,6 +90,9 @@ extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, struct smb_rqst *); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); +extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, + unsigned int size, unsigned int *num, + unsigned int *credits); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */ , const int flags); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c9c4f5ac3c78..c79bdf3e6f51 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1670,8 +1670,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, break; } - len = min((size_t)cifs_sb->wsize, - write_size - total_written); + len = min(server->ops->wp_retry_size(dentry->d_inode), + (unsigned int)write_size - total_written); /* iov[0] is reserved for smb header */ iov[1].iov_base = (char *)write_data + total_written; iov[1].iov_len = len; @@ -2031,6 +2031,7 @@ static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); + struct TCP_Server_Info *server; bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; @@ -2053,23 +2054,30 @@ static int cifs_writepages(struct address_space *mapping, range_whole = true; scanned = true; } + server = cifs_sb_master_tcon(cifs_sb)->ses->server; retry: while (!done && index <= end) { - unsigned int i, nr_pages, found_pages; + unsigned int i, nr_pages, found_pages, wsize, credits; pgoff_t next = 0, tofind, saved_index = index; - tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, - end - index) + 1; + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + &wsize, &credits); + if (rc) + break; + + tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1; wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index, &found_pages); if (!wdata) { rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } if (found_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, credits, 0); break; } @@ -2079,13 +2087,17 @@ retry: /* nothing to write? */ if (nr_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, credits, 0); continue; } + wdata->credits = credits; + rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); /* send failure -- clean up the mess */ if (rc != 0) { + add_credits_and_wake_if(server, wdata->credits, 0); for (i = 0; i < nr_pages; ++i) { if (rc == -EAGAIN) redirty_page_for_writepage(wbc, @@ -2466,17 +2478,26 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, memcpy(&saved_from, from, sizeof(struct iov_iter)); do { - nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); + unsigned int wsize, credits; + + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + &wsize, &credits); + if (rc) + break; + + nr_pages = get_numpages(wsize, len, &cur_len); wdata = cifs_writedata_alloc(nr_pages, cifs_uncached_writev_complete); if (!wdata) { rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } rc = cifs_write_allocate_pages(wdata->pages, nr_pages); if (rc) { kfree(wdata); + add_credits_and_wake_if(server, credits, 0); break; } @@ -2486,6 +2507,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, for (i = 0; i < nr_pages; i++) put_page(wdata->pages[i]); kfree(wdata); + add_credits_and_wake_if(server, credits, 0); break; } @@ -2504,12 +2526,14 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, wdata->bytes = cur_len; wdata->pagesz = PAGE_SIZE; wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); + wdata->credits = credits; if (!wdata->cfile->invalidHandle || !cifs_reopen_file(wdata->cfile, false)) rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); if (rc) { + add_credits_and_wake_if(server, wdata->credits, 0); kref_put(&wdata->refcount, cifs_uncached_writedata_release); if (rc == -EAGAIN) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 8a963426d810..5e8c22d6c7b9 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1025,6 +1025,7 @@ struct smb_version_operations smb1_operations = { .set_credits = cifs_set_credits, .get_credits_field = cifs_get_credits_field, .get_credits = cifs_get_credits, + .wait_mtu_credits = cifs_wait_mtu_credits, .get_next_mid = cifs_get_next_mid, .read_data_offset = cifs_read_data_offset, .read_data_length = cifs_read_data_length, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e35ce5b3d88f..3427c1fa3806 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -112,6 +112,53 @@ smb2_get_credits(struct mid_q_entry *mid) return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest); } +static int +smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, + unsigned int *num, unsigned int *credits) +{ + int rc = 0; + unsigned int scredits; + + spin_lock(&server->req_lock); + while (1) { + if (server->credits <= 0) { + spin_unlock(&server->req_lock); + cifs_num_waiters_inc(server); + rc = wait_event_killable(server->request_q, + has_credits(server, &server->credits)); + cifs_num_waiters_dec(server); + if (rc) + return rc; + spin_lock(&server->req_lock); + } else { + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->req_lock); + return -ENOENT; + } + + scredits = server->credits; + /* can deadlock with reopen */ + if (scredits == 1) { + *num = SMB2_MAX_BUFFER_SIZE; + *credits = 0; + break; + } + + /* leave one credit for a possible reopen */ + scredits--; + *num = min_t(unsigned int, size, + scredits * SMB2_MAX_BUFFER_SIZE); + + *credits = DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE); + server->credits -= *credits; + server->in_flight++; + break; + } + } + spin_unlock(&server->req_lock); + return rc; +} + static __u64 smb2_get_next_mid(struct TCP_Server_Info *server) { @@ -182,8 +229,9 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified wsize, or default */ wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); - /* set it to the maximum buffer size value we can send with 1 credit */ - wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); + + if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) + wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); return wsize; } @@ -1120,6 +1168,7 @@ struct smb_version_operations smb20_operations = { .set_credits = smb2_set_credits, .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, + .wait_mtu_credits = cifs_wait_mtu_credits, .get_next_mid = smb2_get_next_mid, .read_data_offset = smb2_read_data_offset, .read_data_length = smb2_read_data_length, @@ -1196,6 +1245,7 @@ struct smb_version_operations smb21_operations = { .set_credits = smb2_set_credits, .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, + .wait_mtu_credits = smb2_wait_mtu_credits, .get_next_mid = smb2_get_next_mid, .read_data_offset = smb2_read_data_offset, .read_data_length = smb2_read_data_length, @@ -1272,6 +1322,7 @@ struct smb_version_operations smb30_operations = { .set_credits = smb2_set_credits, .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, + .wait_mtu_credits = smb2_wait_mtu_credits, .get_next_mid = smb2_get_next_mid, .read_data_offset = smb2_read_data_offset, .read_data_length = smb2_read_data_length, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index da7aa62bf061..be7b2eb55134 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -108,7 +108,6 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , if (!tcon) goto out; - /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */ /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ if ((tcon->ses) && @@ -1892,15 +1891,25 @@ int smb2_async_writev(struct cifs_writedata *wdata, void (*release)(struct kref *kref)) { - int rc = -EACCES; + int rc = -EACCES, flags = 0; struct smb2_write_req *req = NULL; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; struct kvec iov; struct smb_rqst rqst; rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); - if (rc) + if (rc) { + if (rc == -EAGAIN && wdata->credits) { + /* credits was reset by reconnect */ + wdata->credits = 0; + /* reduce in_flight value since we won't send the req */ + spin_lock(&server->req_lock); + server->in_flight--; + spin_unlock(&server->req_lock); + } goto async_writev_out; + } req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); @@ -1933,9 +1942,20 @@ smb2_async_writev(struct cifs_writedata *wdata, inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); + if (wdata->credits) { + req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, + SMB2_MAX_BUFFER_SIZE)); + spin_lock(&server->req_lock); + server->credits += wdata->credits - + le16_to_cpu(req->hdr.CreditCharge); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + flags = CIFS_HAS_CREDITS; + } + kref_get(&wdata->refcount); - rc = cifs_call_async(tcon->ses->server, &rqst, NULL, - smb2_writev_callback, wdata, 0); + rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata, + flags); if (rc) { kref_put(&wdata->refcount, release); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 59c748ce872f..5111e7272db6 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -466,7 +466,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) static inline void smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr) { + unsigned int i, num = le16_to_cpu(hdr->CreditCharge); + hdr->MessageId = get_next_mid64(server); + /* skip message numbers according to CreditCharge field */ + for (i = 1; i < num; i++) + get_next_mid(server); } static struct mid_q_entry * diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 18cd5650a5fc..9d087f4e7d4e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -448,6 +448,15 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout, return wait_for_free_credits(server, timeout, val); } +int +cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, + unsigned int *num, unsigned int *credits) +{ + *num = size; + *credits = 0; + return 0; +} + static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, struct mid_q_entry **ppmidQ) { @@ -531,20 +540,23 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, { int rc, timeout, optype; struct mid_q_entry *mid; + unsigned int credits = 0; timeout = flags & CIFS_TIMEOUT_MASK; optype = flags & CIFS_OP_MASK; - rc = wait_for_free_request(server, timeout, optype); - if (rc) - return rc; + if ((flags & CIFS_HAS_CREDITS) == 0) { + rc = wait_for_free_request(server, timeout, optype); + if (rc) + return rc; + credits = 1; + } mutex_lock(&server->srv_mutex); mid = server->ops->setup_async_request(server, rqst); if (IS_ERR(mid)) { mutex_unlock(&server->srv_mutex); - add_credits(server, 1, optype); - wake_up(&server->request_q); + add_credits_and_wake_if(server, credits, optype); return PTR_ERR(mid); } @@ -572,8 +584,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, return 0; cifs_delete_mid(mid); - add_credits(server, 1, optype); - wake_up(&server->request_q); + add_credits_and_wake_if(server, credits, optype); return rc; } From 387eb92ac6892518fb67e423f65fcaca76e256a8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 24 Jun 2014 13:08:54 +0400 Subject: [PATCH 17/35] CIFS: Separate page search from readpages Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 107 ++++++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c79bdf3e6f51..bec48f1bc3ca 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3340,6 +3340,63 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, return total_read > 0 && result != -EAGAIN ? total_read : result; } +static int +readpages_get_pages(struct address_space *mapping, struct list_head *page_list, + unsigned int rsize, struct list_head *tmplist, + unsigned int *nr_pages, loff_t *offset, unsigned int *bytes) +{ + struct page *page, *tpage; + unsigned int expected_index; + int rc; + + page = list_entry(page_list->prev, struct page, lru); + + /* + * Lock the page and put it in the cache. Since no one else + * should have access to this page, we're safe to simply set + * PG_locked without checking it first. + */ + __set_page_locked(page); + rc = add_to_page_cache_locked(page, mapping, + page->index, GFP_KERNEL); + + /* give up if we can't stick it in the cache */ + if (rc) { + __clear_page_locked(page); + return rc; + } + + /* move first page to the tmplist */ + *offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + *bytes = PAGE_CACHE_SIZE; + *nr_pages = 1; + list_move_tail(&page->lru, tmplist); + + /* now try and add more pages onto the request */ + expected_index = page->index + 1; + list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { + /* discontinuity ? */ + if (page->index != expected_index) + break; + + /* would this page push the read over the rsize? */ + if (*bytes + PAGE_CACHE_SIZE > rsize) + break; + + __set_page_locked(page); + if (add_to_page_cache_locked(page, mapping, page->index, + GFP_KERNEL)) { + __clear_page_locked(page); + break; + } + list_move_tail(&page->lru, tmplist); + (*bytes) += PAGE_CACHE_SIZE; + expected_index++; + (*nr_pages)++; + } + return rc; +} + static int cifs_readpages(struct file *file, struct address_space *mapping, struct list_head *page_list, unsigned num_pages) { @@ -3394,57 +3451,15 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the rdata->pages, then we want them in increasing order. */ while (!list_empty(page_list)) { - unsigned int i; - unsigned int bytes = PAGE_CACHE_SIZE; - unsigned int expected_index; - unsigned int nr_pages = 1; + unsigned int i, nr_pages, bytes; loff_t offset; struct page *page, *tpage; struct cifs_readdata *rdata; - page = list_entry(page_list->prev, struct page, lru); - - /* - * Lock the page and put it in the cache. Since no one else - * should have access to this page, we're safe to simply set - * PG_locked without checking it first. - */ - __set_page_locked(page); - rc = add_to_page_cache_locked(page, mapping, - page->index, GFP_KERNEL); - - /* give up if we can't stick it in the cache */ - if (rc) { - __clear_page_locked(page); + rc = readpages_get_pages(mapping, page_list, rsize, &tmplist, + &nr_pages, &offset, &bytes); + if (rc) break; - } - - /* move first page to the tmplist */ - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - list_move_tail(&page->lru, &tmplist); - - /* now try and add more pages onto the request */ - expected_index = page->index + 1; - list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { - /* discontinuity ? */ - if (page->index != expected_index) - break; - - /* would this page push the read over the rsize? */ - if (bytes + PAGE_CACHE_SIZE > rsize) - break; - - __set_page_locked(page); - if (add_to_page_cache_locked(page, mapping, - page->index, GFP_KERNEL)) { - __clear_page_locked(page); - break; - } - list_move_tail(&page->lru, &tmplist); - bytes += PAGE_CACHE_SIZE; - expected_index++; - nr_pages++; - } rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); if (!rdata) { From 69cebd75606f8b9162ad5d0104367370ceabeeba Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 24 Jun 2014 13:42:03 +0400 Subject: [PATCH 18/35] CIFS: Fix rsize usage in readpages If a server changes maximum buffer size for read (rsize) requests on reconnect we can fail on repeating with a big size buffer on -EAGAIN error in readpages. Fix this by checking rsize all the time before repeating requests. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bec48f1bc3ca..d6279185e2f0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3349,6 +3349,8 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, unsigned int expected_index; int rc; + INIT_LIST_HEAD(tmplist); + page = list_entry(page_list->prev, struct page, lru); /* @@ -3404,18 +3406,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, struct list_head tmplist; struct cifsFileInfo *open_file = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - unsigned int rsize = cifs_sb->rsize; + struct TCP_Server_Info *server; pid_t pid; - /* - * Give up immediately if rsize is too small to read an entire page. - * The VFS will fall back to readpage. We should never reach this - * point however since we set ra_pages to 0 when the rsize is smaller - * than a cache page. - */ - if (unlikely(rsize < PAGE_CACHE_SIZE)) - return 0; - /* * Reads as many pages as possible from fscache. Returns -ENOBUFS * immediately if the cookie is negative @@ -3434,7 +3427,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pid = current->tgid; rc = 0; - INIT_LIST_HEAD(&tmplist); + server = tlink_tcon(open_file->tlink)->ses->server; cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", __func__, file, mapping, num_pages); @@ -3456,8 +3449,17 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, struct page *page, *tpage; struct cifs_readdata *rdata; - rc = readpages_get_pages(mapping, page_list, rsize, &tmplist, - &nr_pages, &offset, &bytes); + /* + * Give up immediately if rsize is too small to read an entire + * page. The VFS will fall back to readpage. We should never + * reach this point however since we set ra_pages to 0 when the + * rsize is smaller than a cache page. + */ + if (unlikely(cifs_sb->rsize < PAGE_CACHE_SIZE)) + return 0; + + rc = readpages_get_pages(mapping, page_list, cifs_sb->rsize, + &tmplist, &nr_pages, &offset, &bytes); if (rc) break; @@ -3487,15 +3489,24 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata->pages[rdata->nr_pages++] = page; } - rc = cifs_retry_async_readv(rdata); - if (rc != 0) { + if (!rdata->cfile->invalidHandle || + !cifs_reopen_file(rdata->cfile, true)) + rc = server->ops->async_readv(rdata); + if (rc) { for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; lru_cache_add_file(page); unlock_page(page); page_cache_release(page); + if (rc == -EAGAIN) + list_add_tail(&page->lru, &tmplist); } kref_put(&rdata->refcount, cifs_readdata_release); + if (rc == -EAGAIN) { + /* Re-add pages to the page_list and retry */ + list_splice(&tmplist, page_list); + continue; + } break; } From 0ada36b244e8316bb47b46b84b33c5a507bed7a4 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 25 Jun 2014 10:42:28 +0400 Subject: [PATCH 19/35] CIFS: Separate page reading from user read Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 69 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d6279185e2f0..7df4e4658d74 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2932,43 +2932,23 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, return total_read > 0 && result != -EAGAIN ? total_read : result; } -ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +static int +cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *rdata_list) { - struct file *file = iocb->ki_filp; - ssize_t rc; - size_t len, cur_len; - ssize_t total_read = 0; - loff_t offset = iocb->ki_pos; + struct cifs_readdata *rdata; unsigned int npages; - struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; - struct cifsFileInfo *open_file; - struct cifs_readdata *rdata, *tmp; - struct list_head rdata_list; + size_t cur_len; + int rc; pid_t pid; - len = iov_iter_count(to); - if (!len) - return 0; - - INIT_LIST_HEAD(&rdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_readv) - return -ENOSYS; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - if ((file->f_flags & O_ACCMODE) == O_WRONLY) - cifs_dbg(FYI, "attempting read on write only file instance\n"); - do { - cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); + cur_len = min_t(const size_t, len, cifs_sb->rsize); npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); /* allocate a readdata struct */ @@ -2999,11 +2979,44 @@ error: break; } - list_add_tail(&rdata->list, &rdata_list); + list_add_tail(&rdata->list, rdata_list); offset += cur_len; len -= cur_len; } while (len > 0); + return rc; +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + ssize_t rc; + size_t len; + ssize_t total_read = 0; + loff_t offset = iocb->ki_pos; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *open_file; + struct cifs_readdata *rdata, *tmp; + struct list_head rdata_list; + + len = iov_iter_count(to); + if (!len) + return 0; + + INIT_LIST_HEAD(&rdata_list); + cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + cifs_dbg(FYI, "attempting read on write only file instance\n"); + + rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list); + /* if at least one read request send succeeded, then reset rc */ if (!list_empty(&rdata_list)) rc = 0; From 25f402598d2c8f0808d93715ad33e43b265c1604 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 25 Jun 2014 10:45:07 +0400 Subject: [PATCH 20/35] CIFS: Fix rsize usage in user read If a server changes maximum buffer size for read (rsize) requests on reconnect we can fail on repeating with a big size buffer on -EAGAIN error in user read. Fix this by checking rsize all the time before repeating requests. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7df4e4658d74..bbc38594b39a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2828,26 +2828,6 @@ cifs_uncached_readdata_release(struct kref *refcount) cifs_readdata_release(refcount); } -static int -cifs_retry_async_readv(struct cifs_readdata *rdata) -{ - int rc; - struct TCP_Server_Info *server; - - server = tlink_tcon(rdata->cfile->tlink)->ses->server; - - do { - if (rdata->cfile->invalidHandle) { - rc = cifs_reopen_file(rdata->cfile, true); - if (rc != 0) - continue; - } - rc = server->ops->async_readv(rdata); - } while (rc == -EAGAIN); - - return rc; -} - /** * cifs_readdata_to_iov - copy data from pages in response to an iovec * @rdata: the readdata response with list of pages holding data @@ -2941,6 +2921,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, size_t cur_len; int rc; pid_t pid; + struct TCP_Server_Info *server; + + server = tlink_tcon(open_file->tlink)->ses->server; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -2971,11 +2954,15 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, rdata->pagesz = PAGE_SIZE; rdata->read_into_pages = cifs_uncached_read_into_pages; - rc = cifs_retry_async_readv(rdata); + if (!rdata->cfile->invalidHandle || + !cifs_reopen_file(rdata->cfile, true)) + rc = server->ops->async_readv(rdata); error: if (rc) { kref_put(&rdata->refcount, cifs_uncached_readdata_release); + if (rc == -EAGAIN) + continue; break; } @@ -3023,8 +3010,8 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) len = iov_iter_count(to); /* the loop below should proceed in the order of increasing offsets */ +again: list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { - again: if (!rc) { /* FIXME: freezable sleep too? */ rc = wait_for_completion_killable(&rdata->done); @@ -3034,7 +3021,19 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) rc = rdata->result; /* resend call if it's a retryable error */ if (rc == -EAGAIN) { - rc = cifs_retry_async_readv(rdata); + struct list_head tmp_list; + + list_del_init(&rdata->list); + INIT_LIST_HEAD(&tmp_list); + + rc = cifs_send_async_read(rdata->offset, + rdata->bytes, rdata->cfile, + cifs_sb, &tmp_list); + + list_splice(&tmp_list, &rdata_list); + + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); goto again; } } else { From e374d90f8a7693f24635bca9e5d56f3775bb36e2 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 25 Jun 2014 16:19:02 +0400 Subject: [PATCH 21/35] CIFS: Fix rsize usage for sync read If a server changes maximum buffer size for read requests (rsize) on reconnect we can fail on repeating with a big size buffer on -EAGAIN error in cifs_read. Fix this by checking rsize all the time before repeating requests. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bbc38594b39a..00b2a254ff1c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3148,18 +3148,19 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) for (total_read = 0, cur_offset = read_data; read_size > total_read; total_read += bytes_read, cur_offset += bytes_read) { - current_read_size = min_t(uint, read_size - total_read, rsize); - /* - * For windows me and 9x we do not want to request more than it - * negotiated since it will refuse the read then. - */ - if ((tcon->ses) && !(tcon->ses->capabilities & + do { + current_read_size = min_t(uint, read_size - total_read, + rsize); + /* + * For windows me and 9x we do not want to request more + * than it negotiated since it will refuse the read + * then. + */ + if ((tcon->ses) && !(tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)) { - current_read_size = min_t(uint, current_read_size, - CIFSMaxBufSize); - } - rc = -EAGAIN; - while (rc == -EAGAIN) { + current_read_size = min_t(uint, + current_read_size, CIFSMaxBufSize); + } if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, true); if (rc != 0) @@ -3172,7 +3173,8 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) rc = server->ops->sync_read(xid, open_file, &io_parms, &bytes_read, &cur_offset, &buf_type); - } + } while (rc == -EAGAIN); + if (rc || (bytes_read == 0)) { if (total_read) { break; From bed9da0213f2174719b68012bd60735a11cfe244 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 25 Jun 2014 11:28:57 +0400 Subject: [PATCH 22/35] CIFS: Use multicredits for SMB 2.1/3 reads If we negotiate SMB 2.1 and higher version of the protocol and a server supports large read buffer size, we need to consume 1 credit per 65536 bytes. So, we need to know how many credits we have and obtain the required number of them before constructing a readdata structure in readpages and user read. Reviewed-by: Shirish Pargaonkar Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 35 ++++++++++++++++++++++++++++------- fs/cifs/smb2ops.c | 5 +++-- fs/cifs/smb2pdu.c | 30 +++++++++++++++++++++++++++--- 4 files changed, 59 insertions(+), 12 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 54ca2b985a0d..f33ff4c7b8a8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1068,6 +1068,7 @@ struct cifs_readdata { struct kvec iov; unsigned int pagesz; unsigned int tailsz; + unsigned int credits; unsigned int nr_pages; struct page *pages[]; }; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 00b2a254ff1c..ebdeb56f8d30 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2917,7 +2917,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *rdata_list) { struct cifs_readdata *rdata; - unsigned int npages; + unsigned int npages, rsize, credits; size_t cur_len; int rc; pid_t pid; @@ -2931,13 +2931,19 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, pid = current->tgid; do { - cur_len = min_t(const size_t, len, cifs_sb->rsize); + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + &rsize, &credits); + if (rc) + break; + + cur_len = min_t(const size_t, len, rsize); npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); /* allocate a readdata struct */ rdata = cifs_readdata_alloc(npages, cifs_uncached_readv_complete); if (!rdata) { + add_credits_and_wake_if(server, credits, 0); rc = -ENOMEM; break; } @@ -2953,12 +2959,14 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, rdata->pid = pid; rdata->pagesz = PAGE_SIZE; rdata->read_into_pages = cifs_uncached_read_into_pages; + rdata->credits = credits; if (!rdata->cfile->invalidHandle || !cifs_reopen_file(rdata->cfile, true)) rc = server->ops->async_readv(rdata); error: if (rc) { + add_credits_and_wake_if(server, rdata->credits, 0); kref_put(&rdata->refcount, cifs_uncached_readdata_release); if (rc == -EAGAIN) @@ -3458,10 +3466,16 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the rdata->pages, then we want them in increasing order. */ while (!list_empty(page_list)) { - unsigned int i, nr_pages, bytes; + unsigned int i, nr_pages, bytes, rsize; loff_t offset; struct page *page, *tpage; struct cifs_readdata *rdata; + unsigned credits; + + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + &rsize, &credits); + if (rc) + break; /* * Give up immediately if rsize is too small to read an entire @@ -3469,13 +3483,17 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * reach this point however since we set ra_pages to 0 when the * rsize is smaller than a cache page. */ - if (unlikely(cifs_sb->rsize < PAGE_CACHE_SIZE)) + if (unlikely(rsize < PAGE_CACHE_SIZE)) { + add_credits_and_wake_if(server, credits, 0); return 0; + } - rc = readpages_get_pages(mapping, page_list, cifs_sb->rsize, - &tmplist, &nr_pages, &offset, &bytes); - if (rc) + rc = readpages_get_pages(mapping, page_list, rsize, &tmplist, + &nr_pages, &offset, &bytes); + if (rc) { + add_credits_and_wake_if(server, credits, 0); break; + } rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); if (!rdata) { @@ -3487,6 +3505,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, page_cache_release(page); } rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } @@ -3497,6 +3516,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata->pid = pid; rdata->pagesz = PAGE_CACHE_SIZE; rdata->read_into_pages = cifs_readpages_read_into_pages; + rdata->credits = credits; list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); @@ -3507,6 +3527,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, !cifs_reopen_file(rdata->cfile, true)) rc = server->ops->async_readv(rdata); if (rc) { + add_credits_and_wake_if(server, rdata->credits, 0); for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; lru_cache_add_file(page); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3427c1fa3806..d0210a8e9829 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -245,8 +245,9 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified rsize, or default */ rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); - /* set it to the maximum buffer size value we can send with 1 credit */ - rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); + + if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) + rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); return rsize; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index be7b2eb55134..c31e5a060338 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1747,11 +1747,12 @@ smb2_readv_callback(struct mid_q_entry *mid) int smb2_async_readv(struct cifs_readdata *rdata) { - int rc; + int rc, flags = 0; struct smb2_hdr *buf; struct cifs_io_parms io_parms; struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1 }; + struct TCP_Server_Info *server; cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", __func__, rdata->offset, rdata->bytes); @@ -1762,18 +1763,41 @@ smb2_async_readv(struct cifs_readdata *rdata) io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; io_parms.pid = rdata->pid; + + server = io_parms.tcon->ses->server; + rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); - if (rc) + if (rc) { + if (rc == -EAGAIN && rdata->credits) { + /* credits was reset by reconnect */ + rdata->credits = 0; + /* reduce in_flight value since we won't send the req */ + spin_lock(&server->req_lock); + server->in_flight--; + spin_unlock(&server->req_lock); + } return rc; + } buf = (struct smb2_hdr *)rdata->iov.iov_base; /* 4 for rfc1002 length field */ rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; + if (rdata->credits) { + buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, + SMB2_MAX_BUFFER_SIZE)); + spin_lock(&server->req_lock); + server->credits += rdata->credits - + le16_to_cpu(buf->CreditCharge); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + flags = CIFS_HAS_CREDITS; + } + kref_get(&rdata->refcount); rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, cifs_readv_receive, smb2_readv_callback, - rdata, 0); + rdata, flags); if (rc) { kref_put(&rdata->refcount, cifs_readdata_release); cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); From 3fabaa274635231c01f3bd0d10c4d353aa200673 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 09:55:52 +0400 Subject: [PATCH 23/35] CIFS: Indicate reconnect with ECONNABORTED error code that let us not mix it with EAGAIN. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/connect.c | 8 ++++---- fs/cifs/file.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 20d75b8ddb26..b0427f6ea971 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -557,7 +557,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, try_to_freeze(); if (server_unresponsive(server)) { - total_read = -EAGAIN; + total_read = -ECONNABORTED; break; } @@ -571,7 +571,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, break; } else if (server->tcpStatus == CifsNeedReconnect) { cifs_reconnect(server); - total_read = -EAGAIN; + total_read = -ECONNABORTED; break; } else if (length == -ERESTARTSYS || length == -EAGAIN || @@ -588,7 +588,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, cifs_dbg(FYI, "Received no data or error: expecting %d\n" "got %d", to_read, length); cifs_reconnect(server); - total_read = -EAGAIN; + total_read = -ECONNABORTED; break; } } @@ -786,7 +786,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server); wake_up(&server->response_q); - return -EAGAIN; + return -ECONNABORTED; } /* switch to large buffer if too big for a small one */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ebdeb56f8d30..9582ded2332e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2909,7 +2909,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 && result != -EAGAIN ? total_read : result; + return total_read > 0 && result != -ECONNABORTED ? total_read : result; } static int @@ -3359,7 +3359,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 && result != -EAGAIN ? total_read : result; + return total_read > 0 && result != -ECONNABORTED ? total_read : result; } static int From 34a54d617785e5ecafe1605df7aa689ec193964c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 10:03:29 +0400 Subject: [PATCH 24/35] CIFS: Use separate var for the number of bytes got in async read and don't mix it with the number of bytes that was requested. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/cifssmb.c | 6 +++--- fs/cifs/file.c | 2 +- fs/cifs/smb2pdu.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f33ff4c7b8a8..0012e1e291d4 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1059,6 +1059,7 @@ struct cifs_readdata { struct address_space *mapping; __u64 offset; unsigned int bytes; + unsigned int got_bytes; pid_t pid; int result; struct work_struct work; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1b3f0aef01e3..1781a03aed5e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1513,7 +1513,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; - rdata->bytes = length; + rdata->got_bytes = length; cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", server->total_read, buflen, data_len); @@ -1556,8 +1556,8 @@ cifs_readv_callback(struct mid_q_entry *mid) rc); } /* FIXME: should this be counted toward the initiating task? */ - task_io_account_read(rdata->bytes); - cifs_stats_bytes_read(tcon, rdata->bytes); + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: case MID_RETRY_NEEDED: diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9582ded2332e..3d5d48838519 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2840,7 +2840,7 @@ cifs_uncached_readdata_release(struct kref *refcount) static int cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) { - size_t remaining = rdata->bytes; + size_t remaining = rdata->got_bytes; unsigned int i; for (i = 0; i < rdata->nr_pages; i++) { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c31e5a060338..c66ae4183bd3 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1723,8 +1723,8 @@ smb2_readv_callback(struct mid_q_entry *mid) rc); } /* FIXME: should this be counted toward the initiating task? */ - task_io_account_read(rdata->bytes); - cifs_stats_bytes_read(tcon, rdata->bytes); + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: case MID_RETRY_NEEDED: From b3160aebb49b5e07f6bc3b8c5bed6013ca9e422e Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 10:16:25 +0400 Subject: [PATCH 25/35] CIFS: Count got bytes in read_into_pages() that let us know how many bytes we have already got before reconnect. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 1 - fs/cifs/file.c | 16 ++++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1781a03aed5e..57d447c56d83 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1513,7 +1513,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; - rdata->got_bytes = length; cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", server->total_read, buflen, data_len); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3d5d48838519..3ea6fc86a471 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2868,11 +2868,12 @@ static int cifs_uncached_read_into_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len) { - int total_read = 0, result = 0; + int result = 0; unsigned int i; unsigned int nr_pages = rdata->nr_pages; struct kvec iov; + rdata->got_bytes = 0; rdata->tailsz = PAGE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; @@ -2906,10 +2907,11 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, if (result < 0) break; - total_read += result; + rdata->got_bytes += result; } - return total_read > 0 && result != -ECONNABORTED ? total_read : result; + return rdata->got_bytes > 0 && result != -ECONNABORTED ? + rdata->got_bytes : result; } static int @@ -3290,7 +3292,7 @@ static int cifs_readpages_read_into_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len) { - int total_read = 0, result = 0; + int result = 0; unsigned int i; u64 eof; pgoff_t eof_index; @@ -3302,6 +3304,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); + rdata->got_bytes = 0; rdata->tailsz = PAGE_CACHE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; @@ -3356,10 +3359,11 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, if (result < 0) break; - total_read += result; + rdata->got_bytes += result; } - return total_read > 0 && result != -ECONNABORTED ? total_read : result; + return rdata->got_bytes > 0 && result != -ECONNABORTED ? + rdata->got_bytes : result; } static int From 2e8a05d80213cdfbf3bf8e6eb3059831c7015e89 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 10:21:15 +0400 Subject: [PATCH 26/35] CIFS: Fix possible buffer corruption in cifs_user_read() If there was a short read in the middle of the rdata list, we can end up with a corrupt output buffer. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3ea6fc86a471..c3390e2c6e0d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3049,7 +3049,9 @@ again: } else { rc = cifs_readdata_to_iov(rdata, to); } - + /* if there was a short read -- discard anything left */ + if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) + rc = -ENODATA; } list_del_init(&rdata->list); kref_put(&rdata->refcount, cifs_uncached_readdata_release); From fb8a3e52559ad52829c6838d304f5b75c140b97a Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 11:50:39 +0400 Subject: [PATCH 27/35] CIFS: Improve indentation in cifs_user_read() Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c3390e2c6e0d..e17012817d9d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3027,28 +3027,27 @@ again: rc = wait_for_completion_killable(&rdata->done); if (rc) rc = -EINTR; - else if (rdata->result) { - rc = rdata->result; + else if (rdata->result == -EAGAIN) { /* resend call if it's a retryable error */ - if (rc == -EAGAIN) { - struct list_head tmp_list; + struct list_head tmp_list; - list_del_init(&rdata->list); - INIT_LIST_HEAD(&tmp_list); + list_del_init(&rdata->list); + INIT_LIST_HEAD(&tmp_list); - rc = cifs_send_async_read(rdata->offset, + rc = cifs_send_async_read(rdata->offset, rdata->bytes, rdata->cfile, cifs_sb, &tmp_list); - list_splice(&tmp_list, &rdata_list); + list_splice(&tmp_list, &rdata_list); - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); - goto again; - } - } else { + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + goto again; + } else if (rdata->result) + rc = rdata->result; + else rc = cifs_readdata_to_iov(rdata, to); - } + /* if there was a short read -- discard anything left */ if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) rc = -ENODATA; From d913ed17f0a7d74e2847695bc920d77a33f2490b Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 11:31:48 +0400 Subject: [PATCH 28/35] CIFS: Optimize cifs_user_read() in a short read case on reconnects by filling the output buffer with a data got from a partially received response and requesting the remaining data from the server. This is suitable for non-signed connections. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 6 ++++++ fs/cifs/file.c | 23 ++++++++++++++++++++--- fs/cifs/smb2pdu.c | 6 ++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 57d447c56d83..66f65001a6d8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1561,6 +1561,12 @@ cifs_readv_callback(struct mid_q_entry *mid) case MID_REQUEST_SUBMITTED: case MID_RETRY_NEEDED: rdata->result = -EAGAIN; + if (server->sign && rdata->got_bytes) + /* reset bytes number since we can not check a sign */ + rdata->got_bytes = 0; + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; default: rdata->result = -EIO; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e17012817d9d..5d2501df8f6b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3030,13 +3030,30 @@ again: else if (rdata->result == -EAGAIN) { /* resend call if it's a retryable error */ struct list_head tmp_list; + unsigned int got_bytes = rdata->got_bytes; list_del_init(&rdata->list); INIT_LIST_HEAD(&tmp_list); - rc = cifs_send_async_read(rdata->offset, - rdata->bytes, rdata->cfile, - cifs_sb, &tmp_list); + /* + * Got a part of data and then reconnect has + * happened -- fill the buffer and continue + * reading. + */ + if (got_bytes && got_bytes < rdata->bytes) { + rc = cifs_readdata_to_iov(rdata, to); + if (rc) { + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + continue; + } + } + + rc = cifs_send_async_read( + rdata->offset + got_bytes, + rdata->bytes - got_bytes, + rdata->cfile, cifs_sb, + &tmp_list); list_splice(&tmp_list, &rdata_list); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c66ae4183bd3..cde943d61fb6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1729,6 +1729,12 @@ smb2_readv_callback(struct mid_q_entry *mid) case MID_REQUEST_SUBMITTED: case MID_RETRY_NEEDED: rdata->result = -EAGAIN; + if (server->sign && rdata->got_bytes) + /* reset bytes number since we can not check a sign */ + rdata->got_bytes = 0; + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; default: if (rdata->result != -ENODATA) From b770ddfa26fcb22ac85fbcdd0f5e88e65a118e01 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jul 2014 11:31:53 +0400 Subject: [PATCH 29/35] CIFS: Optimize readpages in a short read case on reconnects by marking pages with a data from a partially received response up-to-date. This is suitable for non-signed connections. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5d2501df8f6b..12b64e02eee1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3281,25 +3281,30 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) static void cifs_readv_complete(struct work_struct *work) { - unsigned int i; + unsigned int i, got_bytes; struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); + got_bytes = rdata->got_bytes; for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; lru_cache_add_file(page); - if (rdata->result == 0) { + if (rdata->result == 0 || + (rdata->result == -EAGAIN && got_bytes)) { flush_dcache_page(page); SetPageUptodate(page); } unlock_page(page); - if (rdata->result == 0) + if (rdata->result == 0 || + (rdata->result == -EAGAIN && got_bytes)) cifs_readpage_to_fscache(rdata->mapping->host, page); + got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes); + page_cache_release(page); rdata->pages[i] = NULL; } From 21496687a79424572f46a84c690d331055f4866f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jul 2014 18:25:52 +0400 Subject: [PATCH 30/35] CIFS: Fix STATUS_CANNOT_DELETE error mapping for SMB2 The existing mapping causes unlink() call to return error after delete operation. Changing the mapping to -EACCES makes the client process the call like CIFS protocol does - reset dos attributes with ATTR_READONLY flag masked off and retry the operation. Cc: stable@vger.kernel.org Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2maperror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 94bd4fbb13d3..e31a9dfdcd39 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, - {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, + {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"}, {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, From 81691503b2090e5238586224af26a26973f6c254 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 14 Jul 2014 23:28:37 -0500 Subject: [PATCH 31/35] Update cifs version to 2.04 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 70f178a7c759..560480263336 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.03" +#define CIFS_VERSION "2.04" #endif /* _CIFSFS_H */ From 480e83275a0284a5fed75323aaa4b32547b14308 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 Aug 2014 15:46:22 -0500 Subject: [PATCH 32/35] Add Pavel to contributor list in cifs AUTHORS file Signed-off-by: Steve French CC: Pavel Shilovsky --- Documentation/filesystems/cifs/AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/filesystems/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS index ca4a67a0bb1e..c98800df677f 100644 --- a/Documentation/filesystems/cifs/AUTHORS +++ b/Documentation/filesystems/cifs/AUTHORS @@ -40,6 +40,7 @@ Gunter Kukkukk (testing and suggestions for support of old servers) Igor Mammedov (DFS support) Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) Scott Lovenberg +Pavel Shilovsky (for great work adding SMB2 support, and various SMB3 features) Test case and Bug Report contributors ------------------------------------- From 2075cf0b71bfffc4942e232f1487ac98bdfb7bf5 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 Aug 2014 16:00:01 -0500 Subject: [PATCH 33/35] update CIFS TODO list Signed-off-by: Steve French --- Documentation/filesystems/cifs/TODO | 97 +++++++++++------------------ 1 file changed, 36 insertions(+), 61 deletions(-) diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO index 355abcdcda98..066ffddc3964 100644 --- a/Documentation/filesystems/cifs/TODO +++ b/Documentation/filesystems/cifs/TODO @@ -1,4 +1,4 @@ -Version 1.53 May 20, 2008 +Version 2.03 August 1, 2014 A Partial List of Missing Features ================================== @@ -7,63 +7,49 @@ Contributions are welcome. There are plenty of opportunities for visible, important contributions to this module. Here is a partial list of the known problems and missing features: -a) Support for SecurityDescriptors(Windows/CIFS ACLs) for chmod/chgrp/chown -so that these operations can be supported to Windows servers +a) SMB3 (and SMB3.02) missing optional features: + - RDMA + - multichannel (started) + - directory leases (improved metadata caching) + - T10 copy offload (copy chunk is only mechanism supported) + - encrypted shares -b) Mapping POSIX ACLs (and eventually NFSv4 ACLs) to CIFS -SecurityDescriptors +b) improved sparse file support -c) Better pam/winbind integration (e.g. to handle uid mapping -better) - -d) Cleanup now unneeded SessSetup code in -fs/cifs/connect.c and add back in NTLMSSP code if any servers -need it - -e) fix NTLMv2 signing when two mounts with different users to same -server. - -f) Directory entry caching relies on a 1 second timer, rather than +c) Directory entry caching relies on a 1 second timer, rather than using FindNotify or equivalent. - (started) -g) quota support (needs minor kernel change since quota calls +d) quota support (needs minor kernel change since quota calls to make it to network filesystems or deviceless filesystems) -h) investigate sync behavior (including syncpage) and check -for proper behavior of intr/nointr - -i) improve support for very old servers (OS/2 and Win9x for example) +e) improve support for very old servers (OS/2 and Win9x for example) Including support for changing the time remotely (utimes command). -j) hook lower into the sockets api (as NFS/SunRPC does) to avoid the +f) hook lower into the sockets api (as NFS/SunRPC does) to avoid the extra copy in/out of the socket buffers in some cases. -k) Better optimize open (and pathbased setfilesize) to reduce the +g) Better optimize open (and pathbased setfilesize) to reduce the oplock breaks coming from windows srv. Piggyback identical file opens on top of each other by incrementing reference count rather than resending (helps reduce server resource utilization and avoid spurious oplock breaks). -l) Improve performance of readpages by sending more than one read -at a time when 8 pages or more are requested. In conjuntion -add support for async_cifs_readpages. - -m) Add support for storing symlink info to Windows servers +h) Add support for storing symlink info to Windows servers in the Extended Attribute format their SFU clients would recognize. -n) Finish fcntl D_NOTIFY support so kde and gnome file list windows +i) Finish inotify support so kde and gnome file list windows will autorefresh (partially complete by Asser). Needs minor kernel vfs change to support removing D_NOTIFY on a file. -o) Add GUI tool to configure /proc/fs/cifs settings and for display of +j) Add GUI tool to configure /proc/fs/cifs settings and for display of the CIFS statistics (started) -p) implement support for security and trusted categories of xattrs +k) implement support for security and trusted categories of xattrs (requires minor protocol extension) to enable better support for SELINUX -q) Implement O_DIRECT flag on open (already supported on mount) +l) Implement O_DIRECT flag on open (already supported on mount) -r) Create UID mapping facility so server UIDs can be mapped on a per +m) Create UID mapping facility so server UIDs can be mapped on a per mount or a per server basis to client UIDs or nobody if no mapping exists. This is helpful when Unix extensions are negotiated to allow better permission checking when UIDs differ on the server @@ -71,28 +57,29 @@ and client. Add new protocol request to the CIFS protocol standard for asking the server for the corresponding name of a particular uid. -s) Add support for CIFS Unix and also the newer POSIX extensions to the -server side for Samba 4. +n) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too) -t) In support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers) -need to add ability to set time to server (utimes command) +o) mount check for unmatched uids -u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too) +p) Add support for new vfs entry point for fallocate -v) mount check for unmatched uids +q) Add tools to take advantage of cifs/smb3 specific ioctls and features +such as "CopyChunk" (fast server side file copy) -w) Add support for new vfs entry point for fallocate +r) encrypted file support -x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of -processes can proceed better in parallel (on the server) +s) improved stats gathering, tools (perhaps integration with nfsometer?) -y) Fix Samba 3 to handle reads/writes over 127K (and remove the cifs mount -restriction of wsize max being 127K) +t) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed +file attribute via chflags) -KNOWN BUGS (updated April 24, 2007) +u) mount helper GUI (to simplify the various configuration options on mount) + + +KNOWN BUGS ==================================== See http://bugzilla.samba.org - search on product "CifsVFS" for -current bug list. +current bug list. Also check http://bugzilla.kernel.org (Product = File System, Component = CIFS) 1) existing symbolic links (Windows reparse points) are recognized but can not be created remotely. They are implemented for Samba and those that @@ -100,30 +87,18 @@ support the CIFS Unix extensions, although earlier versions of Samba overly restrict the pathnames. 2) follow_link and readdir code does not follow dfs junctions but recognizes them -3) create of new files to FAT partitions on Windows servers can -succeed but still return access denied (appears to be Windows -server not cifs client problem) and has not been reproduced recently. -NTFS partitions do not have this problem. -4) Unix/POSIX capabilities are reset after reconnection, and affect -a few fields in the tree connection but we do do not know which -superblocks to apply these changes to. We should probably walk -the list of superblocks to set these. Also need to check the -flags on the second mount to the same share, and see if we -can do the same trick that NFS does to remount duplicate shares. Misc testing to do ================== 1) check out max path names and max path name components against various server types. Try nested symlinks (8 deep). Return max path name in stat -f information -2) Modify file portion of ltp so it can run against a mounted network -share and run it against cifs vfs in automated fashion. +2) Improve xfstest's cifs enablement and adapt xfstests where needed to test +cifs better 3) Additional performance testing and optimization using iozone and similar - there are some easy changes that can be done to parallelize sequential writes, and when signing is disabled to request larger read sizes (larger than negotiated size) and send larger write sizes to modern servers. -4) More exhaustively test against less common servers. More testing -against Windows 9x, Windows ME servers. - +4) More exhaustively test against less common servers From 59b04c5df75bd715002bb535930ae1982a739269 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 2 Aug 2014 21:16:48 -0500 Subject: [PATCH 34/35] [CIFS] Fix incorrect hex vs. decimal in some debug print statements Joe Perches and Hans Wennborg noticed that various places in the kernel were printing decimal numbers with 0x prefix. printk("0x%d") or equivalent This fixes the instances of this in the cifs driver. CC: Hans Wennborg CC: Joe Perches Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 2 +- fs/cifs/misc.c | 4 ++-- fs/cifs/smb2misc.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index f3ac4154cbb6..44ec72684df5 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) tcon->nativeFileSystem); } seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" - "\n\tPathComponentMax: %d Status: 0x%d", + "\n\tPathComponentMax: %d Status: %d", le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), le32_to_cpu(tcon->fsAttrInfo.Attributes), le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 64997a04ab59..e65e17b3d484 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -423,7 +423,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) return true; } if (pSMBr->hdr.Status.CifsError) { - cifs_dbg(FYI, "notify err 0x%d\n", + cifs_dbg(FYI, "notify err 0x%x\n", pSMBr->hdr.Status.CifsError); return true; } @@ -450,7 +450,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (pSMB->hdr.WordCount != 8) return false; - cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n", + cifs_dbg(FYI, "oplock type 0x%x level 0x%x\n", pSMB->LockType, pSMB->OplockLevel); if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) return false; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index b8021fde987d..f2e6ac29a8d6 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -437,7 +437,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, continue; cifs_dbg(FYI, "found in the open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + cifs_dbg(FYI, "lease key match, lease break 0x%x\n", le32_to_cpu(rsp->NewLeaseState)); server->ops->set_oplock_level(cinode, lease_state, 0, NULL); @@ -467,7 +467,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, } cifs_dbg(FYI, "found in the pending open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + cifs_dbg(FYI, "lease key match, lease break 0x%x\n", le32_to_cpu(rsp->NewLeaseState)); open->oplock = lease_state; @@ -546,7 +546,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) return false; } - cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel); + cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel); /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); From f29ebb47d5bb59ef246966b047356c03629a9705 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 19 Jul 2014 21:44:58 -0500 Subject: [PATCH 35/35] Add worker function to set allocation size Adds setinfo worker function for SMB2/SMB3 support of SET_ALLOCATION_INFORMATION Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2inode.c | 2 +- fs/cifs/smb2ops.c | 3 ++- fs/cifs/smb2pdu.c | 10 +++++++--- fs/cifs/smb2proto.h | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 84c012a6aba0..0150182a4494 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -91,7 +91,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, case SMB2_OP_SET_EOF: tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid, fid.volatile_fid, current->tgid, - (__le64 *)data); + (__le64 *)data, false); break; case SMB2_OP_SET_INFO: tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d0210a8e9829..77f8aeb9c2fc 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -19,6 +19,7 @@ #include #include +#include #include "cifsglob.h" #include "smb2pdu.h" #include "smb2proto.h" @@ -736,7 +737,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, { __le64 eof = cpu_to_le64(size); return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid, cfile->pid, &eof); + cfile->fid.volatile_fid, cfile->pid, &eof, false); } static int diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index cde943d61fb6..42ebc1a8be6c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2361,7 +2361,7 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, - u64 volatile_fid, u32 pid, __le64 *eof) + u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc) { struct smb2_file_eof_info info; void *data; @@ -2372,8 +2372,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, data = &info; size = sizeof(struct smb2_file_eof_info); - return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, - FILE_END_OF_FILE_INFORMATION, 1, &data, &size); + if (is_falloc) + return send_set_info(xid, tcon, persistent_fid, volatile_fid, + pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size); + else + return send_set_info(xid, tcon, persistent_fid, volatile_fid, + pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size); } int diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 0ce48db20a65..67e8ce8055de 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -139,7 +139,7 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, __le16 *target_file); extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 pid, - __le64 *eof); + __le64 *eof, bool is_fallocate); extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf);