From 5245c924c2191ee0f39d8586a57178baba13dbf2 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Jun 2014 16:56:18 +0200 Subject: [PATCH 01/24] s390/qdio: add helpers to manage qdio buffers Users of qdio buffers employ different strategies to manage these buffers. The qeth driver uses huge contiguous buffers which leads to high order allocations with all their downsides. This patch provides helpers to allocate, free, and reset arrays of qdio buffers using non contiguous pages. Reviewed-by: Martin Peschke Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 4 +++ drivers/s390/cio/qdio_setup.c | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index d786c634e052..06f3034605a1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -415,6 +415,10 @@ struct qdio_brinfo_entry_l2 { #define QDIO_FLAG_SYNC_OUTPUT 0x02 #define QDIO_FLAG_PCI_OUT 0x10 +int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count); +void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count); +void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count); + extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index f5f4a91fab44..f76bff68d1de 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -17,6 +17,8 @@ #include "qdio.h" #include "qdio_debug.h" +#define QBUFF_PER_PAGE (PAGE_SIZE / sizeof(struct qdio_buffer)) + static struct kmem_cache *qdio_q_cache; static struct kmem_cache *qdio_aob_cache; @@ -32,6 +34,57 @@ void qdio_release_aob(struct qaob *aob) } EXPORT_SYMBOL_GPL(qdio_release_aob); +/** + * qdio_free_buffers() - free qdio buffers + * @buf: array of pointers to qdio buffers + * @count: number of qdio buffers to free + */ +void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count) +{ + int pos; + + for (pos = 0; pos < count; pos += QBUFF_PER_PAGE) + free_page((unsigned long) buf[pos]); +} +EXPORT_SYMBOL_GPL(qdio_free_buffers); + +/** + * qdio_alloc_buffers() - allocate qdio buffers + * @buf: array of pointers to qdio buffers + * @count: number of qdio buffers to allocate + */ +int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count) +{ + int pos; + + for (pos = 0; pos < count; pos += QBUFF_PER_PAGE) { + buf[pos] = (void *) get_zeroed_page(GFP_KERNEL); + if (!buf[pos]) { + qdio_free_buffers(buf, count); + return -ENOMEM; + } + } + for (pos = 0; pos < count; pos++) + if (pos % QBUFF_PER_PAGE) + buf[pos] = buf[pos - 1] + 1; + return 0; +} +EXPORT_SYMBOL_GPL(qdio_alloc_buffers); + +/** + * qdio_reset_buffers() - reset qdio buffers + * @buf: array of pointers to qdio buffers + * @count: number of qdio buffers that will be zeroed + */ +void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count) +{ + int pos; + + for (pos = 0; pos < count; pos++) + memset(buf[pos], 0, sizeof(struct qdio_buffer)); +} +EXPORT_SYMBOL_GPL(qdio_reset_buffers); + /* * qebsm is only available under 64bit but the adapter sets the feature * flag anyway, so we manually override it. From 4601ba6c92b000dcda37c9d74587e3b88374c00c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Jun 2014 17:04:07 +0200 Subject: [PATCH 02/24] s390/qeth: qdio queue helpers Get rid of duplicated code by introducing some helpers to allocate and free qdio queues used by qeth for its input and completion queue. No functional change. Reviewed-by: Ursula Braun Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/net/qeth_core_main.c | 53 ++++++++++++++++--------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f54bec54d677..a0a6ad7a1739 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -292,6 +292,26 @@ int qeth_realloc_buffer_pool(struct qeth_card *card, int bufcnt) } EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool); +static void qeth_free_qdio_queue(struct qeth_qdio_q *q) +{ + kfree(q); +} + +static struct qeth_qdio_q *qeth_alloc_qdio_queue(void) +{ + struct qeth_qdio_q *q = kzalloc(sizeof(*q), GFP_KERNEL); + int i; + + if (!q) + return NULL; + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) + q->bufs[i].buffer = &q->qdio_bufs[i]; + + QETH_DBF_HEX(SETUP, 2, &q, sizeof(void *)); + return q; +} + static inline int qeth_cq_init(struct qeth_card *card) { int rc; @@ -323,21 +343,12 @@ static inline int qeth_alloc_cq(struct qeth_card *card) struct qdio_outbuf_state *outbuf_states; QETH_DBF_TEXT(SETUP, 2, "cqon"); - card->qdio.c_q = kzalloc(sizeof(struct qeth_qdio_q), - GFP_KERNEL); + card->qdio.c_q = qeth_alloc_qdio_queue(); if (!card->qdio.c_q) { rc = -1; goto kmsg_out; } - QETH_DBF_HEX(SETUP, 2, &card->qdio.c_q, sizeof(void *)); - - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { - card->qdio.c_q->bufs[i].buffer = - &card->qdio.c_q->qdio_bufs[i]; - } - card->qdio.no_in_queues = 2; - card->qdio.out_bufstates = kzalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q * @@ -361,7 +372,7 @@ static inline int qeth_alloc_cq(struct qeth_card *card) out: return rc; free_cq_out: - kfree(card->qdio.c_q); + qeth_free_qdio_queue(card->qdio.c_q); card->qdio.c_q = NULL; kmsg_out: dev_err(&card->gdev->dev, "Failed to create completion queue\n"); @@ -372,7 +383,7 @@ static inline void qeth_free_cq(struct qeth_card *card) { if (card->qdio.c_q) { --card->qdio.no_in_queues; - kfree(card->qdio.c_q); + qeth_free_qdio_queue(card->qdio.c_q); card->qdio.c_q = NULL; } kfree(card->qdio.out_bufstates); @@ -1296,7 +1307,7 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) if (card->qdio.in_q->bufs[j].rx_skb) dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb); } - kfree(card->qdio.in_q); + qeth_free_qdio_queue(card->qdio.in_q); card->qdio.in_q = NULL; /* inbound buffer pool */ qeth_free_buffer_pool(card); @@ -2422,19 +2433,11 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card) QETH_QDIO_ALLOCATED) != QETH_QDIO_UNINITIALIZED) return 0; - card->qdio.in_q = kzalloc(sizeof(struct qeth_qdio_q), - GFP_KERNEL); + QETH_DBF_TEXT(SETUP, 2, "inq"); + card->qdio.in_q = qeth_alloc_qdio_queue(); if (!card->qdio.in_q) goto out_nomem; - QETH_DBF_TEXT(SETUP, 2, "inq"); - QETH_DBF_HEX(SETUP, 2, &card->qdio.in_q, sizeof(void *)); - memset(card->qdio.in_q, 0, sizeof(struct qeth_qdio_q)); - /* give inbound qeth_qdio_buffers their qdio_buffers */ - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { - card->qdio.in_q->bufs[i].buffer = - &card->qdio.in_q->qdio_bufs[i]; - card->qdio.in_q->bufs[i].rx_skb = NULL; - } + /* inbound buffer pool */ if (qeth_alloc_buffer_pool(card)) goto out_freeinq; @@ -2484,7 +2487,7 @@ out_freeoutq: out_freepool: qeth_free_buffer_pool(card); out_freeinq: - kfree(card->qdio.in_q); + qeth_free_qdio_queue(card->qdio.in_q); card->qdio.in_q = NULL; out_nomem: atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED); From 6d284bde2beef9d4d067281b08f86554f41de799 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Jun 2014 17:07:47 +0200 Subject: [PATCH 03/24] s390/qeth: extract qdio buffers from input buffer struct Because of the embedded qdio_buffer array struct qeth_qdio_q is quite large resulting in an order 4 allocation. This is likely to fail at runtime and wastes a lot of memory since the actual size is just about 36K. Since there is no need for this buffer to be contiguous split it up using qdio buffer helpers. Reported-by: Neale Ferguson Reviewed-by: Ursula Braun Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/net/qeth_core.h | 4 ++-- drivers/s390/net/qeth_core_main.c | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index a2088af51cc5..4f9a3180f663 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -439,10 +439,10 @@ struct qeth_qdio_buffer { }; struct qeth_qdio_q { - struct qdio_buffer qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; + struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qeth_qdio_buffer bufs[QDIO_MAX_BUFFERS_PER_Q]; int next_buf_to_init; -} __attribute__ ((aligned(256))); +}; struct qeth_qdio_out_buffer { struct qdio_buffer *buffer; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a0a6ad7a1739..acedba8539e4 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -294,6 +294,10 @@ EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool); static void qeth_free_qdio_queue(struct qeth_qdio_q *q) { + if (!q) + return; + + qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -305,8 +309,13 @@ static struct qeth_qdio_q *qeth_alloc_qdio_queue(void) if (!q) return NULL; + if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) { + kfree(q); + return NULL; + } + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) - q->bufs[i].buffer = &q->qdio_bufs[i]; + q->bufs[i].buffer = q->qdio_bufs[i]; QETH_DBF_HEX(SETUP, 2, &q, sizeof(void *)); return q; @@ -318,8 +327,8 @@ static inline int qeth_cq_init(struct qeth_card *card) if (card->options.cq == QETH_CQ_ENABLED) { QETH_DBF_TEXT(SETUP, 2, "cqinit"); - memset(card->qdio.c_q->qdio_bufs, 0, - QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer)); + qdio_reset_buffers(card->qdio.c_q->qdio_bufs, + QDIO_MAX_BUFFERS_PER_Q); card->qdio.c_q->next_buf_to_init = 127; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, card->qdio.no_in_queues - 1, 0, @@ -2791,8 +2800,8 @@ int qeth_init_qdio_queues(struct qeth_card *card) QETH_DBF_TEXT(SETUP, 2, "initqdqs"); /* inbound queue */ - memset(card->qdio.in_q->qdio_bufs, 0, - QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer)); + qdio_reset_buffers(card->qdio.in_q->qdio_bufs, + QDIO_MAX_BUFFERS_PER_Q); qeth_initialize_working_pool_list(card); /*give only as many buffers to hardware as we have buffer pool entries*/ for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i) @@ -3533,7 +3542,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, for (i = first_element; i < first_element + count; ++i) { int bidx = i % QDIO_MAX_BUFFERS_PER_Q; - struct qdio_buffer *buffer = &cq->qdio_bufs[bidx]; + struct qdio_buffer *buffer = cq->qdio_bufs[bidx]; int e; e = 0; From d445a4e28c0ff740e946ae22860be85428814c39 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Jun 2014 17:09:25 +0200 Subject: [PATCH 04/24] s390/qeth: extract qdio buffers from output buffer struct Because of the embedded qdio_buffer array struct qeth_qdio_out_q is quite large resulting in an order 4 allocation. This is likely to fail at runtime and wastes a lot of memory since the actual size is just about 34K. Since there is no need for this buffer to be contiguous split it up using qdio buffer helpers. Reported-by: Neale Ferguson Reviewed-by: Ursula Braun Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/net/qeth_core.h | 4 +- drivers/s390/net/qeth_core_main.c | 91 +++++++++++++++++++------------ 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 4f9a3180f663..4b32608e79f9 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -465,7 +465,7 @@ enum qeth_out_q_states { }; struct qeth_qdio_out_q { - struct qdio_buffer qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; + struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qdio_outbuf_state *bufstates; /* convenience pointer */ int queue_no; @@ -483,7 +483,7 @@ struct qeth_qdio_out_q { atomic_t used_buffers; /* indicates whether PCI flag must be set (or if one is outstanding) */ atomic_t set_pci_flags_count; -} __attribute__ ((aligned(256))); +}; struct qeth_qdio_info { atomic_t state; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index acedba8539e4..bececacbc836 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1302,35 +1302,6 @@ static void qeth_free_buffer_pool(struct qeth_card *card) } } -static void qeth_free_qdio_buffers(struct qeth_card *card) -{ - int i, j; - - if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == - QETH_QDIO_UNINITIALIZED) - return; - - qeth_free_cq(card); - cancel_delayed_work_sync(&card->buffer_reclaim_work); - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { - if (card->qdio.in_q->bufs[j].rx_skb) - dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb); - } - qeth_free_qdio_queue(card->qdio.in_q); - card->qdio.in_q = NULL; - /* inbound buffer pool */ - qeth_free_buffer_pool(card); - /* free outbound qdio_qs */ - if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - kfree(card->qdio.out_qs[i]); - } - kfree(card->qdio.out_qs); - card->qdio.out_qs = NULL; - } -} - static void qeth_clean_channel(struct qeth_channel *channel) { int cnt; @@ -2412,7 +2383,7 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) rc = -ENOMEM; goto out; } - newbuf->buffer = &q->qdio_bufs[bidx]; + newbuf->buffer = q->qdio_bufs[bidx]; skb_queue_head_init(&newbuf->skb_list); lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key); newbuf->q = q; @@ -2431,6 +2402,28 @@ out: return rc; } +static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +{ + if (!q) + return; + + qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); + kfree(q); +} + +static struct qeth_qdio_out_q *qeth_alloc_qdio_out_buf(void) +{ + struct qeth_qdio_out_q *q = kzalloc(sizeof(*q), GFP_KERNEL); + + if (!q) + return NULL; + + if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) { + kfree(q); + return NULL; + } + return q; +} static int qeth_alloc_qdio_buffers(struct qeth_card *card) { @@ -2458,8 +2451,7 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card) if (!card->qdio.out_qs) goto out_freepool; for (i = 0; i < card->qdio.no_out_queues; ++i) { - card->qdio.out_qs[i] = kzalloc(sizeof(struct qeth_qdio_out_q), - GFP_KERNEL); + card->qdio.out_qs[i] = qeth_alloc_qdio_out_buf(); if (!card->qdio.out_qs[i]) goto out_freeoutq; QETH_DBF_TEXT_(SETUP, 2, "outq %i", i); @@ -2488,7 +2480,7 @@ out_freeoutqbufs: } out_freeoutq: while (i > 0) { - kfree(card->qdio.out_qs[--i]); + qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); } kfree(card->qdio.out_qs); @@ -2503,6 +2495,35 @@ out_nomem: return -ENOMEM; } +static void qeth_free_qdio_buffers(struct qeth_card *card) +{ + int i, j; + + if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == + QETH_QDIO_UNINITIALIZED) + return; + + qeth_free_cq(card); + cancel_delayed_work_sync(&card->buffer_reclaim_work); + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { + if (card->qdio.in_q->bufs[j].rx_skb) + dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb); + } + qeth_free_qdio_queue(card->qdio.in_q); + card->qdio.in_q = NULL; + /* inbound buffer pool */ + qeth_free_buffer_pool(card); + /* free outbound qdio_qs */ + if (card->qdio.out_qs) { + for (i = 0; i < card->qdio.no_out_queues; ++i) { + qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); + qeth_free_qdio_out_buf(card->qdio.out_qs[i]); + } + kfree(card->qdio.out_qs); + card->qdio.out_qs = NULL; + } +} + static void qeth_create_qib_param_field(struct qeth_card *card, char *param_field) { @@ -2823,8 +2844,8 @@ int qeth_init_qdio_queues(struct qeth_card *card) /* outbound queue */ for (i = 0; i < card->qdio.no_out_queues; ++i) { - memset(card->qdio.out_qs[i]->qdio_bufs, 0, - QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer)); + qdio_reset_buffers(card->qdio.out_qs[i]->qdio_bufs, + QDIO_MAX_BUFFERS_PER_Q); for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { qeth_clear_output_buffer(card->qdio.out_qs[i], card->qdio.out_qs[i]->bufs[j], From 852eb1aa2bd342dede0126f50d4fde7d963db32e Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Jun 2014 17:11:38 +0200 Subject: [PATCH 05/24] s390/zfcp: use qdio buffer helpers Use qdio buffer helpers to manage the buffers used for the request and response queues. No functional change. Reviewed-by: Steffen Maier Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/scsi/zfcp_qdio.c | 49 +++++++++++++++-------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 06025cdaa4ad..495e1cb3afa6 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -14,27 +14,10 @@ #include "zfcp_ext.h" #include "zfcp_qdio.h" -#define QBUFF_PER_PAGE (PAGE_SIZE / sizeof(struct qdio_buffer)) - static bool enable_multibuffer = 1; module_param_named(datarouter, enable_multibuffer, bool, 0400); MODULE_PARM_DESC(datarouter, "Enable hardware data router support (default on)"); -static int zfcp_qdio_buffers_enqueue(struct qdio_buffer **sbal) -{ - int pos; - - for (pos = 0; pos < QDIO_MAX_BUFFERS_PER_Q; pos += QBUFF_PER_PAGE) { - sbal[pos] = (struct qdio_buffer *) get_zeroed_page(GFP_KERNEL); - if (!sbal[pos]) - return -ENOMEM; - } - for (pos = 0; pos < QDIO_MAX_BUFFERS_PER_Q; pos++) - if (pos % QBUFF_PER_PAGE) - sbal[pos] = sbal[pos - 1] + 1; - return 0; -} - static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *id, unsigned int qdio_err) { @@ -326,15 +309,30 @@ static void zfcp_qdio_setup_init_data(struct qdio_initialize *id, static int zfcp_qdio_allocate(struct zfcp_qdio *qdio) { struct qdio_initialize init_data; + int ret; - if (zfcp_qdio_buffers_enqueue(qdio->req_q) || - zfcp_qdio_buffers_enqueue(qdio->res_q)) + ret = qdio_alloc_buffers(qdio->req_q, QDIO_MAX_BUFFERS_PER_Q); + if (ret) return -ENOMEM; + ret = qdio_alloc_buffers(qdio->res_q, QDIO_MAX_BUFFERS_PER_Q); + if (ret) + goto free_req_q; + zfcp_qdio_setup_init_data(&init_data, qdio); init_waitqueue_head(&qdio->req_q_wq); - return qdio_allocate(&init_data); + ret = qdio_allocate(&init_data); + if (ret) + goto free_res_q; + + return 0; + +free_res_q: + qdio_free_buffers(qdio->res_q, QDIO_MAX_BUFFERS_PER_Q); +free_req_q: + qdio_free_buffers(qdio->req_q, QDIO_MAX_BUFFERS_PER_Q); + return ret; } /** @@ -448,19 +446,14 @@ failed_establish: void zfcp_qdio_destroy(struct zfcp_qdio *qdio) { - int p; - if (!qdio) return; if (qdio->adapter->ccw_device) qdio_free(qdio->adapter->ccw_device); - for (p = 0; p < QDIO_MAX_BUFFERS_PER_Q; p += QBUFF_PER_PAGE) { - free_page((unsigned long) qdio->req_q[p]); - free_page((unsigned long) qdio->res_q[p]); - } - + qdio_free_buffers(qdio->req_q, QDIO_MAX_BUFFERS_PER_Q); + qdio_free_buffers(qdio->res_q, QDIO_MAX_BUFFERS_PER_Q); kfree(qdio); } @@ -475,7 +468,7 @@ int zfcp_qdio_setup(struct zfcp_adapter *adapter) qdio->adapter = adapter; if (zfcp_qdio_allocate(qdio)) { - zfcp_qdio_destroy(qdio); + kfree(qdio); return -ENOMEM; } From d7309aaadd7e830651ba720e3ae7374e29baeb2d Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 26 Jun 2014 19:41:46 +0200 Subject: [PATCH 06/24] s390/dasd: remove unnecessary null test before debugfs_remove This fixes checkpatch warning: "WARNING: debugfs_remove(NULL) is safe this check is probably not required" Signed-off-by: Fabian Frederick Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 1eef0f586950..ea2729fbc86c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -283,8 +283,7 @@ static int dasd_state_basic_to_known(struct dasd_device *device) if (device->block) { dasd_profile_exit(&device->block->profile); - if (device->block->debugfs_dentry) - debugfs_remove(device->block->debugfs_dentry); + debugfs_remove(device->block->debugfs_dentry); dasd_gendisk_free(device->block); dasd_block_clear_timer(device->block); } @@ -293,9 +292,7 @@ static int dasd_state_basic_to_known(struct dasd_device *device) return rc; dasd_device_clear_timer(device); dasd_profile_exit(&device->profile); - if (device->debugfs_dentry) - debugfs_remove(device->debugfs_dentry); - + debugfs_remove(device->debugfs_dentry); DBF_DEV_EVENT(DBF_EMERG, device, "%p debug area deleted", device); if (device->debug_area != NULL) { debug_unregister(device->debug_area); @@ -1111,23 +1108,17 @@ static void dasd_profile_init(struct dasd_profile *profile, static void dasd_profile_exit(struct dasd_profile *profile) { dasd_profile_off(profile); - if (profile->dentry) { - debugfs_remove(profile->dentry); - profile->dentry = NULL; - } + debugfs_remove(profile->dentry); + profile->dentry = NULL; } static void dasd_statistics_removeroot(void) { dasd_global_profile_level = DASD_PROFILE_OFF; - if (dasd_global_profile_dentry) { - debugfs_remove(dasd_global_profile_dentry); - dasd_global_profile_dentry = NULL; - } - if (dasd_debugfs_global_entry) - debugfs_remove(dasd_debugfs_global_entry); - if (dasd_debugfs_root_entry) - debugfs_remove(dasd_debugfs_root_entry); + debugfs_remove(dasd_global_profile_dentry); + dasd_global_profile_dentry = NULL; + debugfs_remove(dasd_debugfs_global_entry); + debugfs_remove(dasd_debugfs_root_entry); } static void dasd_statistics_createroot(void) From 7048a2b532912467d776bbc157c80bbdb8795a85 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 26 Jun 2014 19:41:47 +0200 Subject: [PATCH 07/24] s390/dasd: Move EXPORT_SYMBOL after function/variable Fix checkpatch warnings: "WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable" Signed-off-by: Fabian Frederick Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 69 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ea2729fbc86c..32650fd0f2f1 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -42,8 +42,10 @@ * SECTION: exported variables of dasd.c */ debug_info_t *dasd_debug_area; +EXPORT_SYMBOL(dasd_debug_area); static struct dentry *dasd_debugfs_root_entry; struct dasd_discipline *dasd_diag_discipline_pointer; +EXPORT_SYMBOL(dasd_diag_discipline_pointer); void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *); MODULE_AUTHOR("Holger Smolinski "); @@ -164,6 +166,7 @@ struct dasd_block *dasd_alloc_block(void) return block; } +EXPORT_SYMBOL_GPL(dasd_alloc_block); /* * Free memory of a device structure. @@ -172,6 +175,7 @@ void dasd_free_block(struct dasd_block *block) { kfree(block); } +EXPORT_SYMBOL_GPL(dasd_free_block); /* * Make a new device known to the system. @@ -576,6 +580,7 @@ void dasd_kick_device(struct dasd_device *device) /* queue call to dasd_kick_device to the kernel event daemon. */ schedule_work(&device->kick_work); } +EXPORT_SYMBOL(dasd_kick_device); /* * dasd_reload_device will schedule a call do do_reload_device to the kernel @@ -636,6 +641,7 @@ void dasd_set_target_state(struct dasd_device *device, int target) mutex_unlock(&device->state_mutex); dasd_put_device(device); } +EXPORT_SYMBOL(dasd_set_target_state); /* * Enable devices with device numbers in [from..to]. @@ -658,6 +664,7 @@ void dasd_enable_device(struct dasd_device *device) if (device->discipline->kick_validate) device->discipline->kick_validate(device); } +EXPORT_SYMBOL(dasd_enable_device); /* * SECTION: device operation (interrupt handler, start i/o, term i/o ...) @@ -1234,6 +1241,7 @@ struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength, dasd_get_device(device); return cqr; } +EXPORT_SYMBOL(dasd_kmalloc_request); struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, int datasize, @@ -1273,6 +1281,7 @@ struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, dasd_get_device(device); return cqr; } +EXPORT_SYMBOL(dasd_smalloc_request); /* * Free memory of a channel program. This function needs to free all the @@ -1295,6 +1304,7 @@ void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) kfree(cqr); dasd_put_device(device); } +EXPORT_SYMBOL(dasd_kfree_request); void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) { @@ -1305,6 +1315,7 @@ void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) spin_unlock_irqrestore(&device->mem_lock, flags); dasd_put_device(device); } +EXPORT_SYMBOL(dasd_sfree_request); /* * Check discipline magic in cqr. @@ -1382,6 +1393,7 @@ int dasd_term_IO(struct dasd_ccw_req *cqr) dasd_schedule_device_bh(device); return rc; } +EXPORT_SYMBOL(dasd_term_IO); /* * Start the i/o. This start_IO can fail if the channel is really busy. @@ -1500,6 +1512,7 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) cqr->intrc = rc; return rc; } +EXPORT_SYMBOL(dasd_start_IO); /* * Timeout function for dasd devices. This is used for different purposes @@ -1532,6 +1545,7 @@ void dasd_device_set_timer(struct dasd_device *device, int expires) else mod_timer(&device->timer, jiffies + expires); } +EXPORT_SYMBOL(dasd_device_set_timer); /* * Clear timeout for a device. @@ -1540,6 +1554,7 @@ void dasd_device_clear_timer(struct dasd_device *device) { del_timer(&device->timer); } +EXPORT_SYMBOL(dasd_device_clear_timer); static void dasd_handle_killed_request(struct ccw_device *cdev, unsigned long intparm) @@ -1592,6 +1607,7 @@ void dasd_generic_handle_state_change(struct dasd_device *device) if (device->block) dasd_schedule_block_bh(device->block); } +EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); /* * Interrupt handler for "normal" ssch-io based dasd devices. @@ -1713,6 +1729,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, dasd_device_clear_timer(device); dasd_schedule_device_bh(device); } +EXPORT_SYMBOL(dasd_int_handler); enum uc_todo dasd_generic_uc_handler(struct ccw_device *cdev, struct irb *irb) { @@ -1986,6 +2003,7 @@ finished: __dasd_device_process_final_queue(device, &flush_queue); return rc; } +EXPORT_SYMBOL_GPL(dasd_flush_device_queue); /* * Acquire the device lock and process queues for the device. @@ -2025,6 +2043,7 @@ void dasd_schedule_device_bh(struct dasd_device *device) dasd_get_device(device); tasklet_hi_schedule(&device->tasklet); } +EXPORT_SYMBOL(dasd_schedule_device_bh); void dasd_device_set_stop_bits(struct dasd_device *device, int bits) { @@ -2057,6 +2076,7 @@ void dasd_add_request_head(struct dasd_ccw_req *cqr) dasd_schedule_device_bh(device); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } +EXPORT_SYMBOL(dasd_add_request_head); /* * Queue a request to the tail of the device ccw_queue. @@ -2075,6 +2095,7 @@ void dasd_add_request_tail(struct dasd_ccw_req *cqr) dasd_schedule_device_bh(device); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } +EXPORT_SYMBOL(dasd_add_request_tail); /* * Wakeup helper for the 'sleep_on' functions. @@ -2300,6 +2321,7 @@ int dasd_sleep_on(struct dasd_ccw_req *cqr) { return _dasd_sleep_on(cqr, 0); } +EXPORT_SYMBOL(dasd_sleep_on); /* * Start requests from a ccw_queue and wait for their completion. @@ -2318,6 +2340,7 @@ int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr) { return _dasd_sleep_on(cqr, 1); } +EXPORT_SYMBOL(dasd_sleep_on_interruptible); /* * Whoa nelly now it gets really hairy. For some functions (e.g. steal lock @@ -2392,6 +2415,7 @@ int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) return rc; } +EXPORT_SYMBOL(dasd_sleep_on_immediatly); /* * Cancels a request that was started with dasd_sleep_on_req. @@ -2433,6 +2457,7 @@ int dasd_cancel_req(struct dasd_ccw_req *cqr) dasd_schedule_device_bh(device); return rc; } +EXPORT_SYMBOL(dasd_cancel_req); /* * SECTION: Operations of the dasd_block layer. @@ -2466,6 +2491,7 @@ void dasd_block_set_timer(struct dasd_block *block, int expires) else mod_timer(&block->timer, jiffies + expires); } +EXPORT_SYMBOL(dasd_block_set_timer); /* * Clear timeout for a dasd_block. @@ -2474,6 +2500,7 @@ void dasd_block_clear_timer(struct dasd_block *block) { del_timer(&block->timer); } +EXPORT_SYMBOL(dasd_block_clear_timer); /* * Process finished error recovery ccw. @@ -2855,6 +2882,7 @@ void dasd_schedule_block_bh(struct dasd_block *block) dasd_get_device(block->base); tasklet_hi_schedule(&block->tasklet); } +EXPORT_SYMBOL(dasd_schedule_block_bh); /* @@ -3225,6 +3253,7 @@ int dasd_generic_probe(struct ccw_device *cdev, async_schedule(dasd_generic_auto_online, cdev); return 0; } +EXPORT_SYMBOL_GPL(dasd_generic_probe); /* * This will one day be called from a global not_oper handler. @@ -3267,6 +3296,7 @@ void dasd_generic_remove(struct ccw_device *cdev) dasd_remove_sysfs_files(cdev); } +EXPORT_SYMBOL_GPL(dasd_generic_remove); /* * Activate a device. This is called from dasd_{eckd,fba}_probe() when either @@ -3339,6 +3369,7 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_put_device(device); return rc; } +EXPORT_SYMBOL_GPL(dasd_generic_set_online); int dasd_generic_set_offline(struct ccw_device *cdev) { @@ -3442,6 +3473,7 @@ interrupted: dasd_put_device(device); return rc; } +EXPORT_SYMBOL_GPL(dasd_generic_set_offline); int dasd_generic_last_path_gone(struct dasd_device *device) { @@ -3514,6 +3546,7 @@ int dasd_generic_notify(struct ccw_device *cdev, int event) dasd_put_device(device); return ret; } +EXPORT_SYMBOL_GPL(dasd_generic_notify); void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) { @@ -3863,39 +3896,3 @@ failed: module_init(dasd_init); module_exit(dasd_exit); - -EXPORT_SYMBOL(dasd_debug_area); -EXPORT_SYMBOL(dasd_diag_discipline_pointer); - -EXPORT_SYMBOL(dasd_add_request_head); -EXPORT_SYMBOL(dasd_add_request_tail); -EXPORT_SYMBOL(dasd_cancel_req); -EXPORT_SYMBOL(dasd_device_clear_timer); -EXPORT_SYMBOL(dasd_block_clear_timer); -EXPORT_SYMBOL(dasd_enable_device); -EXPORT_SYMBOL(dasd_int_handler); -EXPORT_SYMBOL(dasd_kfree_request); -EXPORT_SYMBOL(dasd_kick_device); -EXPORT_SYMBOL(dasd_kmalloc_request); -EXPORT_SYMBOL(dasd_schedule_device_bh); -EXPORT_SYMBOL(dasd_schedule_block_bh); -EXPORT_SYMBOL(dasd_set_target_state); -EXPORT_SYMBOL(dasd_device_set_timer); -EXPORT_SYMBOL(dasd_block_set_timer); -EXPORT_SYMBOL(dasd_sfree_request); -EXPORT_SYMBOL(dasd_sleep_on); -EXPORT_SYMBOL(dasd_sleep_on_immediatly); -EXPORT_SYMBOL(dasd_sleep_on_interruptible); -EXPORT_SYMBOL(dasd_smalloc_request); -EXPORT_SYMBOL(dasd_start_IO); -EXPORT_SYMBOL(dasd_term_IO); - -EXPORT_SYMBOL_GPL(dasd_generic_probe); -EXPORT_SYMBOL_GPL(dasd_generic_remove); -EXPORT_SYMBOL_GPL(dasd_generic_notify); -EXPORT_SYMBOL_GPL(dasd_generic_set_online); -EXPORT_SYMBOL_GPL(dasd_generic_set_offline); -EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); -EXPORT_SYMBOL_GPL(dasd_flush_device_queue); -EXPORT_SYMBOL_GPL(dasd_alloc_block); -EXPORT_SYMBOL_GPL(dasd_free_block); From 4ce259664e6a502bf9641c90ac5ec049cae15e2d Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 26 Jun 2014 19:41:48 +0200 Subject: [PATCH 08/24] s390/dasd: replace pr_warning by pr_warn Fix checkpatch warning: "WARNING: Prefer pr_warn(... to pr_warning(..." + coalesce formats. Signed-off-by: Fabian Frederick Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 32650fd0f2f1..baae088d81e2 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3221,8 +3221,8 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie) ret = ccw_device_set_online(cdev); if (ret) - pr_warning("%s: Setting the DASD online failed with rc=%d\n", - dev_name(&cdev->dev), ret); + pr_warn("%s: Setting the DASD online failed with rc=%d\n", + dev_name(&cdev->dev), ret); } /* @@ -3319,9 +3319,8 @@ int dasd_generic_set_online(struct ccw_device *cdev, discipline = base_discipline; if (device->features & DASD_FEATURE_USEDIAG) { if (!dasd_diag_discipline_pointer) { - pr_warning("%s Setting the DASD online failed because " - "of missing DIAG discipline\n", - dev_name(&cdev->dev)); + pr_warn("%s Setting the DASD online failed because of missing DIAG discipline\n", + dev_name(&cdev->dev)); dasd_delete_device(device); return -ENODEV; } @@ -3342,9 +3341,8 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* check_device will allocate block device if necessary */ rc = discipline->check_device(device); if (rc) { - pr_warning("%s Setting the DASD online with discipline %s " - "failed with rc=%i\n", - dev_name(&cdev->dev), discipline->name, rc); + pr_warn("%s Setting the DASD online with discipline %s failed with rc=%i\n", + dev_name(&cdev->dev), discipline->name, rc); module_put(discipline->owner); module_put(base_discipline->owner); dasd_delete_device(device); @@ -3353,8 +3351,8 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_set_target_state(device, DASD_STATE_ONLINE); if (device->state <= DASD_STATE_KNOWN) { - pr_warning("%s Setting the DASD online failed because of a " - "missing discipline\n", dev_name(&cdev->dev)); + pr_warn("%s Setting the DASD online failed because of a missing discipline\n", + dev_name(&cdev->dev)); rc = -ENODEV; dasd_set_target_state(device, DASD_STATE_NEW); if (device->block) @@ -3393,13 +3391,11 @@ int dasd_generic_set_offline(struct ccw_device *cdev) open_count = atomic_read(&device->block->open_count); if (open_count > max_count) { if (open_count > 0) - pr_warning("%s: The DASD cannot be set offline " - "with open count %i\n", - dev_name(&cdev->dev), open_count); + pr_warn("%s: The DASD cannot be set offline with open count %i\n", + dev_name(&cdev->dev), open_count); else - pr_warning("%s: The DASD cannot be set offline " - "while it is in use\n", - dev_name(&cdev->dev)); + pr_warn("%s: The DASD cannot be set offline while it is in use\n", + dev_name(&cdev->dev)); clear_bit(DASD_FLAG_OFFLINE, &device->flags); dasd_put_device(device); return -EBUSY; From c794caf9cbc9d9dccc2d85828a242db5b8592a2a Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 26 Jun 2014 19:41:49 +0200 Subject: [PATCH 09/24] s390/dasd: replace seq_printf by seq_puts Fix checkpatch warnings: "WARNING: Prefer seq_puts to seq_printf" Signed-off-by: Fabian Frederick Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index baae088d81e2..1e5062690926 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -976,37 +976,37 @@ static void dasd_stats_seq_print(struct seq_file *m, seq_printf(m, "total_sectors %u\n", data->dasd_io_sects); seq_printf(m, "total_pav %u\n", data->dasd_io_alias); seq_printf(m, "total_hpf %u\n", data->dasd_io_tpm); - seq_printf(m, "histogram_sectors "); + seq_puts(m, "histogram_sectors "); dasd_stats_array(m, data->dasd_io_secs); - seq_printf(m, "histogram_io_times "); + seq_puts(m, "histogram_io_times "); dasd_stats_array(m, data->dasd_io_times); - seq_printf(m, "histogram_io_times_weighted "); + seq_puts(m, "histogram_io_times_weighted "); dasd_stats_array(m, data->dasd_io_timps); - seq_printf(m, "histogram_time_build_to_ssch "); + seq_puts(m, "histogram_time_build_to_ssch "); dasd_stats_array(m, data->dasd_io_time1); - seq_printf(m, "histogram_time_ssch_to_irq "); + seq_puts(m, "histogram_time_ssch_to_irq "); dasd_stats_array(m, data->dasd_io_time2); - seq_printf(m, "histogram_time_ssch_to_irq_weighted "); + seq_puts(m, "histogram_time_ssch_to_irq_weighted "); dasd_stats_array(m, data->dasd_io_time2ps); - seq_printf(m, "histogram_time_irq_to_end "); + seq_puts(m, "histogram_time_irq_to_end "); dasd_stats_array(m, data->dasd_io_time3); - seq_printf(m, "histogram_ccw_queue_length "); + seq_puts(m, "histogram_ccw_queue_length "); dasd_stats_array(m, data->dasd_io_nr_req); seq_printf(m, "total_read_requests %u\n", data->dasd_read_reqs); seq_printf(m, "total_read_sectors %u\n", data->dasd_read_sects); seq_printf(m, "total_read_pav %u\n", data->dasd_read_alias); seq_printf(m, "total_read_hpf %u\n", data->dasd_read_tpm); - seq_printf(m, "histogram_read_sectors "); + seq_puts(m, "histogram_read_sectors "); dasd_stats_array(m, data->dasd_read_secs); - seq_printf(m, "histogram_read_times "); + seq_puts(m, "histogram_read_times "); dasd_stats_array(m, data->dasd_read_times); - seq_printf(m, "histogram_read_time_build_to_ssch "); + seq_puts(m, "histogram_read_time_build_to_ssch "); dasd_stats_array(m, data->dasd_read_time1); - seq_printf(m, "histogram_read_time_ssch_to_irq "); + seq_puts(m, "histogram_read_time_ssch_to_irq "); dasd_stats_array(m, data->dasd_read_time2); - seq_printf(m, "histogram_read_time_irq_to_end "); + seq_puts(m, "histogram_read_time_irq_to_end "); dasd_stats_array(m, data->dasd_read_time3); - seq_printf(m, "histogram_read_ccw_queue_length "); + seq_puts(m, "histogram_read_ccw_queue_length "); dasd_stats_array(m, data->dasd_read_nr_req); } @@ -1020,7 +1020,7 @@ static int dasd_stats_show(struct seq_file *m, void *v) data = profile->data; if (!data) { spin_unlock_bh(&profile->lock); - seq_printf(m, "disabled\n"); + seq_puts(m, "disabled\n"); return 0; } dasd_stats_seq_print(m, data); @@ -1073,7 +1073,7 @@ static ssize_t dasd_stats_global_write(struct file *file, static int dasd_stats_global_show(struct seq_file *m, void *v) { if (!dasd_global_profile_level) { - seq_printf(m, "disabled\n"); + seq_puts(m, "disabled\n"); return 0; } dasd_stats_seq_print(m, &dasd_global_profile_data); @@ -1176,7 +1176,7 @@ static void dasd_statistics_removeroot(void) int dasd_stats_generic_show(struct seq_file *m, void *v) { - seq_printf(m, "Statistics are not activated in this kernel\n"); + seq_puts(m, "Statistics are not activated in this kernel\n"); return 0; } From a8fa3943500ca89076bab20f63565fbf8ad58257 Mon Sep 17 00:00:00 2001 From: Philipp Hachtmann Date: Wed, 9 Jul 2014 09:36:55 +0200 Subject: [PATCH 10/24] s390/watchdog: Fix module name in Kconfig help text The help text for DIAG288_WATCHDOG in drivers/watchdog/Kconfig still mentioned the misleading old module name vmwatchdog. This patch changes that to the correct new name diag288_wdt. Signed-off-by: Philipp Hachtmann Signed-off-by: Martin Schwidefsky --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 76dd54122f76..f57312fced80 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1293,7 +1293,7 @@ config DIAG288_WATCHDOG both. To compile this driver as a module, choose M here. The module - will be called vmwatchdog. + will be called diag288_wdt. # SUPERH (sh + sh64) Architecture From bd858e84d4a179c3030dccba56efb6a93e455c10 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 10 Jul 2014 18:14:20 +0200 Subject: [PATCH 11/24] s390/kdump: Return NOTIFY_OK for all actions other than MEM_GOING_OFFLINE We only have to check kdump memory for the MEM_GOING_OFFLINE action. Therefore skip the test and return NOTIFY_OK for all other memory hotplug actions. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1e2264b46e4c..ae1d5be7dd88 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -501,6 +501,8 @@ static int kdump_mem_notifier(struct notifier_block *nb, { struct memory_notify *arg = data; + if (action != MEM_GOING_OFFLINE) + return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; if (arg->start_pfn > PFN_DOWN(crashk_res.end)) From 896cb7e635ec562cd9f2dc98dea193727a50eade Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 16 Jul 2014 17:21:01 +0200 Subject: [PATCH 12/24] s390/pci: fix kmsg component KMSG_COMPONENT has to be defined instead of COMPONENT. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 4 ++-- arch/s390/pci/pci_clp.c | 4 ++-- arch/s390/pci/pci_debug.c | 4 ++-- arch/s390/pci/pci_event.c | 4 ++-- arch/s390/pci/pci_sysfs.c | 4 ++-- drivers/pci/hotplug/s390_pci_hpc.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 30de42730b2f..2fa7b14b9c08 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -15,8 +15,8 @@ * Thomas Klein */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 96545d7659fd..6e22a247de9b 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c5c66840ac00..eec598c5939f 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 6d7f5a3016ca..460fdb21cf61 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 9190214b8702..fa3ce891e597 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index d1332d2f8730..d77e46bca54c 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -7,8 +7,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI hpc" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include From daa991bf30cbea85dc10560e6932406677ed5d5c Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 18 Jul 2014 14:19:25 +0200 Subject: [PATCH 13/24] dasd: use aliases for formatted devices during format Formatting of a previously formatted device is slower than newly format a device when alias devices are available. For already formatted devices the alias devices are not used for formatting. Fix the alias handling for already formatted devices. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 11 ++++++----- drivers/s390/block/dasd_eckd.c | 4 ++-- drivers/s390/block/dasd_int.h | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 1e5062690926..5e8442c88e29 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -285,6 +285,12 @@ static int dasd_state_basic_to_known(struct dasd_device *device) { int rc; + if (device->discipline->basic_to_known) { + rc = device->discipline->basic_to_known(device); + if (rc) + return rc; + } + if (device->block) { dasd_profile_exit(&device->block->profile); debugfs_remove(device->block->debugfs_dentry); @@ -375,11 +381,6 @@ static int dasd_state_ready_to_basic(struct dasd_device *device) { int rc; - if (device->discipline->ready_to_basic) { - rc = device->discipline->ready_to_basic(device); - if (rc) - return rc; - } device->state = DASD_STATE_BASIC; if (device->block) { struct dasd_block *block = device->block; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 2e8e0755070b..39748fda6e1f 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2039,7 +2039,7 @@ static int dasd_eckd_online_to_ready(struct dasd_device *device) return 0; }; -static int dasd_eckd_ready_to_basic(struct dasd_device *device) +static int dasd_eckd_basic_to_known(struct dasd_device *device) { return dasd_alias_remove_device(device); }; @@ -4511,7 +4511,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .verify_path = dasd_eckd_verify_path, .basic_to_ready = dasd_eckd_basic_to_ready, .online_to_ready = dasd_eckd_online_to_ready, - .ready_to_basic = dasd_eckd_ready_to_basic, + .basic_to_known = dasd_eckd_basic_to_known, .fill_geometry = dasd_eckd_fill_geometry, .start_IO = dasd_start_IO, .term_IO = dasd_term_IO, diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 690001af0d09..7a14582dc8a4 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -304,7 +304,7 @@ struct dasd_discipline { */ int (*basic_to_ready) (struct dasd_device *); int (*online_to_ready) (struct dasd_device *); - int (*ready_to_basic) (struct dasd_device *); + int (*basic_to_known)(struct dasd_device *); /* (struct dasd_device *); * Device operation functions. build_cp creates a ccw chain for From 931a3dce819d2dedc731011c6fd7e6bb5aa188ef Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 18 Jul 2014 14:22:41 +0200 Subject: [PATCH 14/24] dasd: fix unresponsive device during format If path events occur the formatting process stucks because path events may flush format requests from the queue. Kick the format process after path events are handled. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 5e8442c88e29..9242d7cff9b5 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1675,8 +1675,11 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, if (cqr->status == DASD_CQR_CLEAR_PENDING && scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) { cqr->status = DASD_CQR_CLEARED; + if (cqr->callback_data == DASD_SLEEPON_START_TAG) + cqr->callback_data = DASD_SLEEPON_END_TAG; dasd_device_clear_timer(device); wake_up(&dasd_flush_wq); + wake_up(&generic_waitq); dasd_schedule_device_bh(device); return; } @@ -2439,6 +2442,8 @@ int dasd_cancel_req(struct dasd_ccw_req *cqr) case DASD_CQR_QUEUED: /* request was not started - just set to cleared */ cqr->status = DASD_CQR_CLEARED; + if (cqr->callback_data == DASD_SLEEPON_START_TAG) + cqr->callback_data = DASD_SLEEPON_END_TAG; break; case DASD_CQR_IN_IO: /* request in IO - terminate IO and release again */ @@ -3512,6 +3517,10 @@ int dasd_generic_path_operational(struct dasd_device *device) dasd_schedule_device_bh(device); if (device->block) dasd_schedule_block_bh(device->block); + + if (!device->stopped) + wake_up(&generic_waitq); + return 1; } EXPORT_SYMBOL_GPL(dasd_generic_path_operational); From 8fa56aed12f1b0a2828da52280e2efbbf1163ad5 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 18 Jul 2014 14:24:20 +0200 Subject: [PATCH 15/24] dasd: fix list_del corruption during format If I/O errors occur during format a kernel panic with a list_del corruption may occur. Stop error recovery procedure after an erp action was taken. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 9242d7cff9b5..925b056dc2b3 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2307,8 +2307,12 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { - if (__dasd_sleep_on_erp(cqr)) + if (__dasd_sleep_on_erp(cqr)) { + if (!cqr->status == DASD_CQR_TERMINATED && + !cqr->status == DASD_CQR_NEED_ERP) + break; rc = 1; + } } if (rc) goto retry; From 29b8dd9d4274bca6526e4bb8d4f46dec1f4c15c9 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 18 Jul 2014 14:26:01 +0200 Subject: [PATCH 16/24] dasd: fix error recovery for alias devices during format Kernel panic or a hanging device during format if an alias device is set offline or I/O errors occur. Omit the error recovery procedure for alias devices and do retries on the base device with full erp. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 22 ++++++++++++++++------ drivers/s390/block/dasd_eckd.c | 26 +++++++++++++++++--------- drivers/s390/block/dasd_int.h | 3 ++- drivers/s390/block/dasd_ioctl.c | 31 ++++++++++++++++++++++++++----- 4 files changed, 61 insertions(+), 21 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 925b056dc2b3..5df05f26b7d9 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2307,17 +2307,27 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { - if (__dasd_sleep_on_erp(cqr)) { - if (!cqr->status == DASD_CQR_TERMINATED && - !cqr->status == DASD_CQR_NEED_ERP) - break; - rc = 1; + /* + * for alias devices simplify error recovery and + * return to upper layer + */ + if (cqr->startdev != cqr->basedev && + (cqr->status == DASD_CQR_TERMINATED || + cqr->status == DASD_CQR_NEED_ERP)) + return -EAGAIN; + else { + /* normal recovery for basedev IO */ + if (__dasd_sleep_on_erp(cqr)) { + if (!cqr->status == DASD_CQR_TERMINATED && + !cqr->status == DASD_CQR_NEED_ERP) + break; + rc = 1; + } } } if (rc) goto retry; - return 0; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 39748fda6e1f..e74e5f7b431d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2061,11 +2061,12 @@ dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo) static struct dasd_ccw_req * dasd_eckd_build_format(struct dasd_device *base, - struct format_data_t *fdata) + struct format_data_t *fdata, + int enable_PAV) { struct dasd_eckd_private *base_priv; struct dasd_eckd_private *start_priv; - struct dasd_device *startdev; + struct dasd_device *startdev = NULL; struct dasd_ccw_req *fcp; struct eckd_count *ect; struct ch_t address; @@ -2079,7 +2080,9 @@ dasd_eckd_build_format(struct dasd_device *base, int nr_tracks; int use_prefix; - startdev = dasd_alias_get_start_dev(base); + if (enable_PAV) + startdev = dasd_alias_get_start_dev(base); + if (!startdev) startdev = base; @@ -2309,6 +2312,7 @@ dasd_eckd_build_format(struct dasd_device *base, fcp->startdev = startdev; fcp->memdev = startdev; + fcp->basedev = base; fcp->retries = 256; fcp->expires = startdev->default_expires * HZ; fcp->buildclk = get_tod_clock(); @@ -2319,7 +2323,8 @@ dasd_eckd_build_format(struct dasd_device *base, static int dasd_eckd_format_device(struct dasd_device *base, - struct format_data_t *fdata) + struct format_data_t *fdata, + int enable_PAV) { struct dasd_ccw_req *cqr, *n; struct dasd_block *block; @@ -2327,7 +2332,7 @@ dasd_eckd_format_device(struct dasd_device *base, struct list_head format_queue; struct dasd_device *device; int old_stop, format_step; - int step, rc = 0; + int step, rc = 0, sleep_rc; block = base->block; private = (struct dasd_eckd_private *) base->private; @@ -2361,11 +2366,11 @@ dasd_eckd_format_device(struct dasd_device *base, } INIT_LIST_HEAD(&format_queue); - old_stop = fdata->stop_unit; + old_stop = fdata->stop_unit; while (fdata->start_unit <= 1) { fdata->stop_unit = fdata->start_unit; - cqr = dasd_eckd_build_format(base, fdata); + cqr = dasd_eckd_build_format(base, fdata, enable_PAV); list_add(&cqr->blocklist, &format_queue); fdata->stop_unit = old_stop; @@ -2383,7 +2388,7 @@ retry: if (step > format_step) fdata->stop_unit = fdata->start_unit + format_step - 1; - cqr = dasd_eckd_build_format(base, fdata); + cqr = dasd_eckd_build_format(base, fdata, enable_PAV); if (IS_ERR(cqr)) { if (PTR_ERR(cqr) == -ENOMEM) { /* @@ -2403,7 +2408,7 @@ retry: } sleep: - dasd_sleep_on_queue(&format_queue); + sleep_rc = dasd_sleep_on_queue(&format_queue); list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { device = cqr->startdev; @@ -2415,6 +2420,9 @@ sleep: private->count--; } + if (sleep_rc) + return sleep_rc; + /* * in case of ENOMEM we need to retry after * first requests are finished diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 7a14582dc8a4..2cc188c8203f 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -175,6 +175,7 @@ struct dasd_ccw_req { struct dasd_block *block; /* the originating block device */ struct dasd_device *memdev; /* the device used to allocate this */ struct dasd_device *startdev; /* device the request is started on */ + struct dasd_device *basedev; /* base device if no block->base */ void *cpaddr; /* address of ccw or tcw */ unsigned char cpmode; /* 0 = cmd mode, 1 = itcw */ char status; /* status of this request */ @@ -321,7 +322,7 @@ struct dasd_discipline { int (*term_IO) (struct dasd_ccw_req *); void (*handle_terminated_request) (struct dasd_ccw_req *); int (*format_device) (struct dasd_device *, - struct format_data_t *); + struct format_data_t *, int enable_PAV); int (*free_cp) (struct dasd_ccw_req *, struct request *); /* diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 25a0f2f8b0b9..ad62608e4175 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -203,7 +203,9 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) { struct dasd_device *base; - int rc; + int enable_PAV = 1; + int rc, retries; + int start, stop; base = block->base; if (base->discipline->format_device == NULL) @@ -231,11 +233,30 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata) bdput(bdev); } - rc = base->discipline->format_device(base, fdata); - if (rc) - return rc; + retries = 255; + /* backup start- and endtrack for retries */ + start = fdata->start_unit; + stop = fdata->stop_unit; + do { + rc = base->discipline->format_device(base, fdata, enable_PAV); + if (rc) { + if (rc == -EAGAIN) { + retries--; + /* disable PAV in case of errors */ + enable_PAV = 0; + fdata->start_unit = start; + fdata->stop_unit = stop; + } else + return rc; + } else + /* success */ + break; + } while (retries); - return 0; + if (!retries) + return -EIO; + else + return 0; } /* From c60d1ae4efcb5790f7d085369baf66c167a6484f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 18 Jul 2014 17:37:08 +0200 Subject: [PATCH 17/24] s390/pci: introduce lazy IOTLB flushing for DMA unmap This changes the default IOTLB flushing method to lazy flushing, which means that there will be no direct flush after each DMA unmap operation. Instead, the iommu bitmap pointer will be adjusted after unmap, so that no DMA address will be re-used until after an iommu bitmap wrap-around. The only IOTLB flush will then happen after each wrap-around. A new kernel parameter "s390_iommu=" is also introduced, to allow changing the flushing behaviour to the old strict method. Reviewed-by: Sebastian Ott Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 7 ++++ arch/s390/pci/pci_dma.c | 50 +++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b7fa2f599459..c848095f2cb0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3023,6 +3023,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. S [KNL] Run init in single mode + s390_iommu= [HW,S390] + Set s390 IOTLB flushing mode + strict + With strict flushing every unmap operation will result in + an IOTLB flush. Default is lazy flushing before reuse, + which is faster. + sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f91c03119804..4cbb29a4d615 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -16,6 +16,13 @@ static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; +static int s390_iommu_strict; + +static int zpci_refresh_global(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, + zdev->iommu_pages * PAGE_SIZE); +} static unsigned long *dma_alloc_cpu_table(void) { @@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, } /* - * rpcit is not required to establish new translations when previously - * invalid translation-table entries are validated, however it is - * required when altering previously valid entries. + * With zdev->tlb_refresh == 0, rpcit is not required to establish new + * translations when previously invalid translation-table entries are + * validated. With lazy unmap, it also is skipped for previously valid + * entries, but a global rpcit is then required before any address can + * be re-used, i.e. after each iommu bitmap wrap-around. */ if (!zdev->tlb_refresh && - ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - /* - * TODO: also need to check that the old entry is indeed INVALID - * and not only for one page but for the whole range... - * -> now we WARN_ON in that case but with lazy unmap that - * needs to be redone! - */ + (!s390_iommu_strict || + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) goto no_refresh; rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, @@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) { unsigned long offset, flags; + int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); - if (offset == -1) + if (offset == -1) { + /* wrap-around */ offset = __dma_alloc_iommu(zdev, 0, size); + wrap = 1; + } if (offset != -1) { zdev->next_bit = offset + size; - if (zdev->next_bit >= zdev->iommu_pages) - zdev->next_bit = 0; + if (!zdev->tlb_refresh && !s390_iommu_strict && wrap) + /* global flush after wrap-around with lazy unmap */ + zpci_refresh_global(zdev); } spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); return offset; @@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size if (!zdev->iommu_bitmap) goto out; bitmap_clear(zdev->iommu_bitmap, offset, size); - if (offset >= zdev->next_bit) + /* + * Lazy flush for unmap: need to move next_bit to avoid address re-use + * until wrap-around. + */ + if (!s390_iommu_strict && offset >= zdev->next_bit) zdev->next_bit = offset + size; out: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); @@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = { /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_dma_ops); + +static int __init s390_iommu_setup(char *str) +{ + if (!strncmp(str, "strict", 6)) + s390_iommu_strict = 1; + return 0; +} + +__setup("s390_iommu=", s390_iommu_setup); From dc295880c6752076f8b94ba3885d0bfff09e3e82 Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Tue, 22 Jul 2014 16:50:57 +0200 Subject: [PATCH 18/24] s390/seccomp: fix error return for filtered system calls The syscall_set_return_value function of s390 negates the error argument before storing the value to the return register gpr2. This is incorrect, the seccomp code already passes the negative error value. Store the unmodified error value to gpr2. Signed-off-by: Jan Willeke Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index abad78d5b10c..5bc12598ae9e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -54,7 +54,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->gprs[2] = error ? -error : val; + regs->gprs[2] = error ? error : val; } static inline void syscall_get_arguments(struct task_struct *task, From e2213e04c1b1e44a09a9d05b79809b7e63c9217e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 22 Jul 2014 16:58:52 +0200 Subject: [PATCH 19/24] s390/irq: improve displayed interrupt order in /proc/interrupts Rework the irqclass_main_desc and irqclass_sub_desc data structures which are used to report detaild IRQ statistics in /proc/interrupts. When called from the procfs ops, the entries in the structures are processed one by one. The index of an IRQ in the structures is identical to its definition in the "enum interruption_class". To control and (re)order the displayed sequence, introduce an irq member in each entry. This helps to display related IRQs together without changing the assigned number in the interruption_class enumeration. That means, adding and displaying new IRQs are independent. Finally, this new behavior improves to maintain a kernel ABI. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/irq.c | 95 ++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 99b0b09646ca..8eb82443cfbd 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -30,6 +30,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); struct irq_class { + int irq; char *name; char *desc; }; @@ -45,9 +46,9 @@ struct irq_class { * up with having a sum which accounts each interrupt twice. */ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { - [EXT_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"}, - [THIN_INTERRUPT] = {.name = "AIO"}, + {.irq = EXT_INTERRUPT, .name = "EXT"}, + {.irq = IO_INTERRUPT, .name = "I/O"}, + {.irq = THIN_INTERRUPT, .name = "AIO"}, }; /* @@ -56,38 +57,38 @@ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { * In addition this list contains non external / I/O events like NMIs. */ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { - [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, - [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, - [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, - [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, - [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, - [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, - [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, - [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, - [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, - [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, - [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, - [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, - [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, - [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, - [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, - [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, - [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, - [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, - [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, - [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, - [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, - [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, - [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, - [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, - [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, - [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, - [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, - [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, - [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, - [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, - [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, - [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, + {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"}, + {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"}, + {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"}, + {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"}, + {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"}, + {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"}, + {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"}, + {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"}, + {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"}, + {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, + {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"}, + {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, + {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, + {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, + {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, + {.irq = IRQIO_CLW, .name = "CLW", .desc = "[I/O] CLAW"}, + {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, + {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"}, + {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, + {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" }, + {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, + {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, + {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; void __init init_IRQ(void) @@ -116,33 +117,37 @@ void do_IRQ(struct pt_regs *regs, int irq) */ int show_interrupts(struct seq_file *p, void *v) { - int irq = *(loff_t *) v; - int cpu; + int index = *(loff_t *) v; + int cpu, irq; get_online_cpus(); - if (irq == 0) { + if (index == 0) { seq_puts(p, " "); for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); goto out; } - if (irq < NR_IRQS) { - if (irq >= NR_IRQS_BASE) + if (index < NR_IRQS) { + if (index >= NR_IRQS_BASE) goto out; - seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + /* Adjust index to process irqclass_main_desc array entries */ + index--; + seq_printf(p, "%s: ", irqclass_main_desc[index].name); + irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); goto out; } - for (irq = 0; irq < NR_ARCH_IRQS; irq++) { - seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for (index = 0; index < NR_ARCH_IRQS; index++) { + seq_printf(p, "%s: ", irqclass_sub_desc[index].name); + irq = irqclass_sub_desc[index].irq; for_each_online_cpu(cpu) seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); - if (irqclass_sub_desc[irq].desc) - seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + if (irqclass_sub_desc[index].desc) + seq_printf(p, " %s", irqclass_sub_desc[index].desc); seq_putc(p, '\n'); } out: From 26d766c60f4ea08cd14f0f3435a6db3d6cc2ae96 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 15 Jul 2014 17:53:12 +0200 Subject: [PATCH 20/24] s390/3215: fix hanging console issue The ccw_device_start in raw3215_start_io can fail. raw3215_try_io does not check if the request could be started and removes any pending timer. This can leave the system in a hanging state. Check for pending request after raw3215_start_io and start a timer if necessary. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3215.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 5af7f0bd6125..a6d47e5eee9e 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -288,12 +288,16 @@ static void raw3215_timeout(unsigned long __data) unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); - if (raw->flags & RAW3215_TIMER_RUNS) { - del_timer(&raw->timer); - raw->flags &= ~RAW3215_TIMER_RUNS; - if (!(raw->port.flags & ASYNC_SUSPENDED)) { - raw3215_mk_write_req(raw); - raw3215_start_io(raw); + raw->flags &= ~RAW3215_TIMER_RUNS; + if (!(raw->port.flags & ASYNC_SUSPENDED)) { + raw3215_mk_write_req(raw); + raw3215_start_io(raw); + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; } } spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); @@ -317,17 +321,15 @@ static inline void raw3215_try_io(struct raw3215_info *raw) (raw->flags & RAW3215_FLUSHING)) { /* execute write requests bigger than minimum size */ raw3215_start_io(raw); - if (raw->flags & RAW3215_TIMER_RUNS) { - del_timer(&raw->timer); - raw->flags &= ~RAW3215_TIMER_RUNS; - } - } else if (!(raw->flags & RAW3215_TIMER_RUNS)) { - /* delay small writes */ - raw->timer.expires = RAW3215_TIMEOUT + jiffies; - add_timer(&raw->timer); - raw->flags |= RAW3215_TIMER_RUNS; } } + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; + } } /* From a94fa154291bb11b1537ac2ff1ae2fd28428e054 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 28 Jul 2014 10:16:35 +0200 Subject: [PATCH 21/24] s390/dasd: fix camel case Rename enable_PAV to enable_pav. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 10 +++++----- drivers/s390/block/dasd_int.h | 2 +- drivers/s390/block/dasd_ioctl.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e74e5f7b431d..51dea7baf02c 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2062,7 +2062,7 @@ dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo) static struct dasd_ccw_req * dasd_eckd_build_format(struct dasd_device *base, struct format_data_t *fdata, - int enable_PAV) + int enable_pav) { struct dasd_eckd_private *base_priv; struct dasd_eckd_private *start_priv; @@ -2080,7 +2080,7 @@ dasd_eckd_build_format(struct dasd_device *base, int nr_tracks; int use_prefix; - if (enable_PAV) + if (enable_pav) startdev = dasd_alias_get_start_dev(base); if (!startdev) @@ -2324,7 +2324,7 @@ dasd_eckd_build_format(struct dasd_device *base, static int dasd_eckd_format_device(struct dasd_device *base, struct format_data_t *fdata, - int enable_PAV) + int enable_pav) { struct dasd_ccw_req *cqr, *n; struct dasd_block *block; @@ -2370,7 +2370,7 @@ dasd_eckd_format_device(struct dasd_device *base, old_stop = fdata->stop_unit; while (fdata->start_unit <= 1) { fdata->stop_unit = fdata->start_unit; - cqr = dasd_eckd_build_format(base, fdata, enable_PAV); + cqr = dasd_eckd_build_format(base, fdata, enable_pav); list_add(&cqr->blocklist, &format_queue); fdata->stop_unit = old_stop; @@ -2388,7 +2388,7 @@ retry: if (step > format_step) fdata->stop_unit = fdata->start_unit + format_step - 1; - cqr = dasd_eckd_build_format(base, fdata, enable_PAV); + cqr = dasd_eckd_build_format(base, fdata, enable_pav); if (IS_ERR(cqr)) { if (PTR_ERR(cqr) == -ENOMEM) { /* diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 2cc188c8203f..c20170166909 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -322,7 +322,7 @@ struct dasd_discipline { int (*term_IO) (struct dasd_ccw_req *); void (*handle_terminated_request) (struct dasd_ccw_req *); int (*format_device) (struct dasd_device *, - struct format_data_t *, int enable_PAV); + struct format_data_t *, int enable_pav); int (*free_cp) (struct dasd_ccw_req *, struct request *); /* diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index ad62608e4175..02837d0ad942 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -203,7 +203,7 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) { struct dasd_device *base; - int enable_PAV = 1; + int enable_pav = 1; int rc, retries; int start, stop; @@ -238,12 +238,12 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata) start = fdata->start_unit; stop = fdata->stop_unit; do { - rc = base->discipline->format_device(base, fdata, enable_PAV); + rc = base->discipline->format_device(base, fdata, enable_pav); if (rc) { if (rc == -EAGAIN) { retries--; /* disable PAV in case of errors */ - enable_PAV = 0; + enable_pav = 0; fdata->start_unit = start; fdata->stop_unit = stop; } else From 55e4283c3eb1d850893f645dd695c9c75d5fa1fc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2014 14:23:29 +0200 Subject: [PATCH 22/24] KVM: s390/mm: Fix page table locking vs. split pmd lock commit ec66ad66a0de87866be347b5ecc83bd46427f53b (s390/mm: enable split page table lock for PMD level) activated the split pmd lock for s390. Turns out that we missed one place: We also have to take the pmd lock instead of the page table lock when we reallocate the page tables (==> changing entries in the PMD) during sie enablement. Cc: stable@vger.kernel.org # 3.15+ Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 37b8241ec784..f90ad8592b36 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1279,6 +1279,7 @@ static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, { unsigned long next, *table, *new; struct page *page; + spinlock_t *ptl; pmd_t *pmd; pmd = pmd_offset(pud, addr); @@ -1296,7 +1297,7 @@ again: if (!new) return -ENOMEM; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (likely((unsigned long *) pmd_deref(*pmd) == table)) { /* Nuke pmd entry pointing to the "short" page table */ pmdp_flush_lazy(mm, addr, pmd); @@ -1310,7 +1311,7 @@ again: page_table_free_rcu(tlb, table); new = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (new) { page_table_free_pgste(new); goto again; From 152125b7a882df36a55a8eadbea6d0edf1461ee7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 24 Jul 2014 11:03:41 +0200 Subject: [PATCH 23/24] s390/mm: implement dirty bits for large segment table entries The large segment table entry format has block of bits for the ACC/F values for the large page. These bits are valid only if another bit (AV bit 0x10000) of the segment table entry is set. The ACC/F bits do not have a meaning if the AV bit is off. This allows to put the THP splitting bit, the segment young bit and the new segment dirty bit into the ACC/F bits as long as the AV bit stays off. The dirty and young information is only available if the pmd is large. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 197 ++++++++++++++++++-------------- arch/s390/mm/hugetlbpage.c | 103 ++++++++--------- arch/s390/mm/pgtable.c | 3 + 3 files changed, 159 insertions(+), 144 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fcba5e03839f..b76317c1f3eb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -287,7 +287,14 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ -#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT + +#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */ +#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */ +#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */ +#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */ +#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */ +#define _SEGMENT_ENTRY_BITS_LARGE 0 +#define _SEGMENT_ENTRY_ORIGIN_LARGE 0 #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) @@ -350,7 +357,7 @@ extern unsigned long MODULES_END; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ @@ -359,30 +366,34 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY (0) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) -#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ -#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ -#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ -#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG +#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ +#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ +#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ +#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ +#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */ +#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * ..R...I...y. - * prot-none, old ..0...1...1. - * prot-none, young ..1...1...1. - * read-only, old ..1...1...0. - * read-only, young ..1...0...1. - * read-write, old ..0...1...0. - * read-write, young ..0...0...1. + * dy..R...I...wr + * prot-none, clean, old 00..1...1...00 + * prot-none, clean, young 01..1...1...00 + * prot-none, dirty, old 10..1...1...00 + * prot-none, dirty, young 11..1...1...00 + * read-only, clean, old 00..1...1...01 + * read-only, clean, young 01..1...0...01 + * read-only, dirty, old 10..1...1...01 + * read-only, dirty, young 11..1...0...01 + * read-write, clean, old 00..1...1...11 + * read-write, clean, young 01..1...0...11 + * read-write, dirty, old 10..0...1...11 + * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) */ -#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ - -/* Set of bits not changed in pmd_modify */ -#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ - | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) +#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf000000000000000UL @@ -455,10 +466,11 @@ extern unsigned long MODULES_END; * Segment entry (large page) protection definitions. */ #define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ - _SEGMENT_ENTRY_NONE) -#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT | \ + _SEGMENT_ENTRY_READ) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_WRITE) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -569,25 +581,23 @@ static inline int pmd_none(pmd_t pmd) static inline int pmd_large(pmd_t pmd) { -#ifdef CONFIG_64BIT return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; -#else - return 0; -#endif } -static inline int pmd_prot_none(pmd_t pmd) +static inline int pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && - (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); + unsigned long origin_mask; + + origin_mask = _SEGMENT_ENTRY_ORIGIN; + if (pmd_large(pmd)) + origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } static inline int pmd_bad(pmd_t pmd) { -#ifdef CONFIG_64BIT if (pmd_large(pmd)) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; -#endif return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } @@ -607,20 +617,22 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { - if (pmd_prot_none(pmd)) - return 0; - return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; +} + +static inline int pmd_dirty(pmd_t pmd) +{ + int dirty = 1; + if (pmd_large(pmd)) + dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; + return dirty; } static inline int pmd_young(pmd_t pmd) { - int young = 0; -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) - young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; - else + int young = 1; + if (pmd_large(pmd)) young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; -#endif return young; } @@ -1391,7 +1403,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) @@ -1413,41 +1425,75 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) return pgprot_val(SEGMENT_WRITE); } +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; + if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + return pmd; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + if (pmd_large(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + if (pmd_large(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY; + if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } + return pmd; +} + static inline pmd_t pmd_mkyoung(pmd_t pmd) { -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) { - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - } else { + if (pmd_large(pmd)) { pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; } -#endif return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - } else { + if (pmd_large(pmd)) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; } -#endif return pmd; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - int young; - - young = pmd_young(pmd); - pmd_val(pmd) &= _SEGMENT_CHG_MASK; + if (pmd_large(pmd)) { + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | + _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT; + pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + return pmd; + } + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= massage_pgprot_pmd(newprot); - if (young) - pmd = pmd_mkyoung(pmd); return pmd; } @@ -1455,16 +1501,9 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { pmd_t __pmd; pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return pmd_mkyoung(__pmd); + return __pmd; } -static inline pmd_t pmd_mkwrite(pmd_t pmd) -{ - /* Do not clobber PROT_NONE segments! */ - if (!pmd_prot_none(pmd)) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - return pmd; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ static inline void __pmdp_csp(pmd_t *pmdp) @@ -1555,34 +1594,21 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) && + (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) - pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } static inline pmd_t pmd_mkhuge(pmd_t pmd) { pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - return pmd; -} - -static inline pmd_t pmd_wrprotect(pmd_t pmd) -{ - /* Do not clobber PROT_NONE segments! */ - if (!pmd_prot_none(pmd)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; -} - -static inline pmd_t pmd_mkdirty(pmd_t pmd) -{ - /* No dirty bit in the segment table entry. */ + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1647,11 +1673,6 @@ static inline int has_transparent_hugepage(void) { return MACHINE_HAS_HPAGE ? 1 : 0; } - -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return pmd_val(pmd) >> PAGE_SHIFT; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 0ff66a7e29bb..389bc17934b7 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -10,42 +10,33 @@ static inline pmd_t __pte_to_pmd(pte_t pte) { - int none, young, prot; pmd_t pmd; /* - * Convert encoding pte bits pmd bits - * .IR...wrdytp ..R...I...y. - * empty .10...000000 -> ..0...1...0. - * prot-none, clean, old .11...000001 -> ..0...1...1. - * prot-none, clean, young .11...000101 -> ..1...1...1. - * prot-none, dirty, old .10...001001 -> ..0...1...1. - * prot-none, dirty, young .10...001101 -> ..1...1...1. - * read-only, clean, old .11...010001 -> ..1...1...0. - * read-only, clean, young .01...010101 -> ..1...0...1. - * read-only, dirty, old .11...011001 -> ..1...1...0. - * read-only, dirty, young .01...011101 -> ..1...0...1. - * read-write, clean, old .11...110001 -> ..0...1...0. - * read-write, clean, young .01...110101 -> ..0...0...1. - * read-write, dirty, old .10...111001 -> ..0...1...0. - * read-write, dirty, young .00...111101 -> ..0...0...1. - * Huge ptes are dirty by definition, a clean pte is made dirty - * by the conversion. + * Convert encoding pte bits pmd bits + * .IR...wrdytp dy..R...I...wr + * empty .10...000000 -> 00..0...1...00 + * prot-none, clean, old .11...000001 -> 00..1...1...00 + * prot-none, clean, young .11...000101 -> 01..1...1...00 + * prot-none, dirty, old .10...001001 -> 10..1...1...00 + * prot-none, dirty, young .10...001101 -> 11..1...1...00 + * read-only, clean, old .11...010001 -> 00..1...1...01 + * read-only, clean, young .01...010101 -> 01..1...0...01 + * read-only, dirty, old .11...011001 -> 10..1...1...01 + * read-only, dirty, young .01...011101 -> 11..1...0...01 + * read-write, clean, old .11...110001 -> 00..0...1...11 + * read-write, clean, young .01...110101 -> 01..0...0...11 + * read-write, dirty, old .10...111001 -> 10..0...1...11 + * read-write, dirty, young .00...111101 -> 11..0...0...11 */ if (pte_present(pte)) { pmd_val(pmd) = pte_val(pte) & PAGE_MASK; - if (pte_val(pte) & _PAGE_INVALID) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - none = (pte_val(pte) & _PAGE_PRESENT) && - !(pte_val(pte) & _PAGE_READ) && - !(pte_val(pte) & _PAGE_WRITE); - prot = (pte_val(pte) & _PAGE_PROTECT) && - !(pte_val(pte) & _PAGE_WRITE); - young = pte_val(pte) & _PAGE_YOUNG; - if (none || young) - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - if (prot || (none && young)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); + pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; } else pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; return pmd; @@ -56,34 +47,31 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_t pte; /* - * Convert encoding pmd bits pte bits - * ..R...I...y. .IR...wrdytp - * empty ..0...1...0. -> .10...000000 - * prot-none, old ..0...1...1. -> .10...001001 - * prot-none, young ..1...1...1. -> .10...001101 - * read-only, old ..1...1...0. -> .11...011001 - * read-only, young ..1...0...1. -> .01...011101 - * read-write, old ..0...1...0. -> .10...111001 - * read-write, young ..0...0...1. -> .00...111101 - * Huge ptes are dirty by definition + * Convert encoding pmd bits pte bits + * dy..R...I...wr .IR...wrdytp + * empty 00..0...1...00 -> .10...001100 + * prot-none, clean, old 00..0...1...00 -> .10...000001 + * prot-none, clean, young 01..0...1...00 -> .10...000101 + * prot-none, dirty, old 10..0...1...00 -> .10...001001 + * prot-none, dirty, young 11..0...1...00 -> .10...001101 + * read-only, clean, old 00..1...1...01 -> .11...010001 + * read-only, clean, young 01..1...1...01 -> .11...010101 + * read-only, dirty, old 10..1...1...01 -> .11...011001 + * read-only, dirty, young 11..1...1...01 -> .11...011101 + * read-write, clean, old 00..0...1...11 -> .10...110001 + * read-write, clean, young 01..0...1...11 -> .10...110101 + * read-write, dirty, old 10..0...1...11 -> .10...111001 + * read-write, dirty, young 11..0...1...11 -> .10...111101 */ if (pmd_present(pmd)) { - pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | - (pmd_val(pmd) & PAGE_MASK); - if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) - pte_val(pte) |= _PAGE_INVALID; - if (pmd_prot_none(pmd)) { - if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) - pte_val(pte) |= _PAGE_YOUNG; - } else { - pte_val(pte) |= _PAGE_READ; - if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_WRITE; - if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) - pte_val(pte) |= _PAGE_YOUNG; - } + pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; + pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); + pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -96,6 +84,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { + /* Emulated huge ptes loose the dirty and young bit */ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= pte_page(pte)[1].index; } else @@ -113,6 +102,8 @@ pte_t huge_ptep_get(pte_t *ptep) origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= *(unsigned long *) origin; + /* Emulated huge ptes are young and dirty by definition */ + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; } return __pmd_to_pte(pmd); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f90ad8592b36..19daa53a3da4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1433,6 +1433,9 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, { VM_BUG_ON(address & ~HPAGE_PMD_MASK); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); if (pmd_same(*pmdp, entry)) return 0; pmdp_invalidate(vma, address, pmdp); From 36e7fdaa1a04fcf65b864232e1af56a51c7814d6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 Aug 2014 09:57:51 +0200 Subject: [PATCH 24/24] s390/locking: Reenable optimistic spinning commit 4badad352a6bb202ec68afa7a574c0bb961e5ebc (locking/mutex: Disable optimistic spinning on some architectures) fenced spinning for architectures without proper cmpxchg. There is no need to disable mutex spinning on s390, though: The instructions CS,CSG and friends provide the proper guarantees. (We dont implement cmpxchg with locks). Signed-off-by: Christian Borntraeger Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bb63499fc5d3..9f00f9301613 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -92,6 +92,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT