From 8759fec4af222f338d08f8f1a7ad6a77ca6cb301 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Wed, 14 Dec 2016 15:15:07 +0100 Subject: [PATCH 01/57] crypto: marvell - Copy IVDIG before launching partial DMA ahash requests Currently, inner IV/DIGEST data are only copied once into the hash engines and not set explicitly before launching a request that is not a first frag. This is an issue especially when multiple ahash reqs are computed in parallel or chained with cipher request, as the state of the request being computed is not updated into the hash engine. It leads to non-deterministic corrupted digest results. Fixes: commit 2786cee8e50b ("crypto: marvell - Move SRAM I/O operations to step functions") Signed-off-by: Romain Perier Acked-by: Boris Brezillon Cc: Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.h | 3 ++- drivers/crypto/marvell/hash.c | 34 +++++++++++++++++++++++++++++++++- drivers/crypto/marvell/tdma.c | 9 ++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index a768da7138a1..b7872f62f674 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -273,7 +273,8 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_SRC_IN_SRAM BIT(30) #define CESA_TDMA_END_OF_REQ BIT(29) #define CESA_TDMA_BREAK_CHAIN BIT(28) -#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) +#define CESA_TDMA_SET_STATE BIT(27) +#define CESA_TDMA_TYPE_MSK GENMASK(26, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 2a9260559654..585c90f9f606 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -281,13 +281,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) sreq->offset = 0; } +static void mv_cesa_ahash_dma_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_req *base = &creq->base; + + /* We must explicitly set the digest state. */ + if (base->chain.first->flags & CESA_TDMA_SET_STATE) { + struct mv_cesa_engine *engine = base->engine; + int i; + + /* Set the hash state in the IVDIG regs. */ + for (i = 0; i < ARRAY_SIZE(creq->state); i++) + writel_relaxed(creq->state[i], engine->regs + + CESA_IVDIG(i)); + } + + mv_cesa_dma_step(base); +} + static void mv_cesa_ahash_step(struct crypto_async_request *req) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->base); + mv_cesa_ahash_dma_step(ahashreq); else mv_cesa_ahash_std_step(ahashreq); } @@ -585,12 +604,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; + bool set_state = false; int ret; u32 type; basereq->chain.first = NULL; basereq->chain.last = NULL; + if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl)) + set_state = true; + if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); @@ -684,6 +707,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (type != CESA_TDMA_RESULT) basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; + if (set_state) { + /* + * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to + * let the step logic know that the IVDIG registers should be + * explicitly set before launching a TDMA chain. + */ + basereq->chain.first->flags |= CESA_TDMA_SET_STATE; + } + return 0; err_free_tdma: diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 4416b88eca70..c76375ff376d 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, last->next = dreq->chain.first; engine->chain.last = dreq->chain.last; - if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + /* + * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on + * the last element of the current chain, or if the request + * being queued needs the IV regs to be set before lauching + * the request. + */ + if (!(last->flags & CESA_TDMA_BREAK_CHAIN) && + !(dreq->chain.first->flags & CESA_TDMA_SET_STATE)) last->next_dma = dreq->chain.first->cur_dma; } } From b1227d019fa98c43381ad8827baf7efbe2923ed1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Dec 2016 18:00:24 -0800 Subject: [PATCH 02/57] ipvlan: fix various issues in ipvlan_process_multicast() 1) netif_rx() / dev_forward_skb() should not be called from process context. 2) ipvlan_count_rx() should be called with preemption disabled. 3) We should check if ipvlan->dev is up before feeding packets to netif_rx() 4) We need to prevent device from disappearing if some packets are in the multicast backlog. 5) One kfree_skb() should be a consume_skb() eventually Fixes: ba35f8588f47 ("ipvlan: Defer multicast / broadcast processing to a work-queue") Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 38 +++++++++++++++++++++----------- drivers/net/ipvlan/ipvlan_main.c | 7 +++++- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b4e990743e1d..ea6bc1e12cdf 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -207,6 +207,9 @@ void ipvlan_process_multicast(struct work_struct *work) spin_unlock_bh(&port->backlog.lock); while ((skb = __skb_dequeue(&list)) != NULL) { + struct net_device *dev = skb->dev; + bool consumed = false; + ethh = eth_hdr(skb); hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); mac_hash = ipvlan_mac_hash(ethh->h_dest); @@ -219,27 +222,29 @@ void ipvlan_process_multicast(struct work_struct *work) dlocal = false; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (hlocal && (ipvlan->dev == skb->dev)) { + if (hlocal && (ipvlan->dev == dev)) { dlocal = true; continue; } if (!test_bit(mac_hash, ipvlan->mac_filters)) continue; - + if (!(ipvlan->dev->flags & IFF_UP)) + continue; ret = NET_RX_DROP; len = skb->len + ETH_HLEN; nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - goto acct; - - nskb->pkt_type = pkt_type; - nskb->dev = ipvlan->dev; - if (hlocal) - ret = dev_forward_skb(ipvlan->dev, nskb); - else - ret = netif_rx(nskb); -acct: + local_bh_disable(); + if (nskb) { + consumed = true; + nskb->pkt_type = pkt_type; + nskb->dev = ipvlan->dev; + if (hlocal) + ret = dev_forward_skb(ipvlan->dev, nskb); + else + ret = netif_rx(nskb); + } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + local_bh_enable(); } rcu_read_unlock(); @@ -249,8 +254,13 @@ acct: skb->pkt_type = pkt_type; dev_queue_xmit(skb); } else { - kfree_skb(skb); + if (consumed) + consume_skb(skb); + else + kfree_skb(skb); } + if (dev) + dev_put(dev); } } @@ -479,6 +489,8 @@ static void ipvlan_multicast_enqueue(struct ipvl_port *port, spin_lock(&port->backlog.lock); if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { + if (skb->dev) + dev_hold(skb->dev); __skb_queue_tail(&port->backlog, skb); spin_unlock(&port->backlog.lock); schedule_work(&port->wq); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 693ec5b66222..8b0f99300cbc 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -135,6 +135,7 @@ err: static void ipvlan_port_destroy(struct net_device *dev) { struct ipvl_port *port = ipvlan_port_get_rtnl(dev); + struct sk_buff *skb; dev->priv_flags &= ~IFF_IPVLAN_MASTER; if (port->mode == IPVLAN_MODE_L3S) { @@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev) } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); - __skb_queue_purge(&port->backlog); + while ((skb = __skb_dequeue(&port->backlog)) != NULL) { + if (skb->dev) + dev_put(skb->dev); + kfree_skb(skb); + } kfree(port); } From e252536068efd1578c6e23e7323527c5e6e980bd Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 21 Dec 2016 17:30:16 -0800 Subject: [PATCH 03/57] ipvlan: fix multicast processing In an IPvlan setup when master is set in loopback mode e.g. ethtool -K eth0 set loopback on where eth0 is master device for IPvlan setup. The failure is caused by the faulty logic that determines if the packet is from TX-path vs. RX-path by just looking at the mac- addresses on the packet while processing multicast packets. In the loopback-mode where this crash was happening, the packets that are sent out are reflected by the NIC and are processed on the RX path, but mac-address check tricks into thinking this packet is from TX path and falsely uses dev_forward_skb() to pass packets to the slave (virtual) devices. This patch records the path while queueing packets and eliminates logic of looking at mac-addresses for the same decision. ------------[ cut here ]------------ kernel BUG at include/linux/skbuff.h:1737! Call Trace: [] dev_forward_skb+0x92/0xd0 [] ipvlan_process_multicast+0x395/0x4c0 [ipvlan] [] ? ipvlan_process_multicast+0xd7/0x4c0 [ipvlan] [] ? process_one_work+0x147/0x660 [] process_one_work+0x1a9/0x660 [] ? process_one_work+0x147/0x660 [] worker_thread+0x11d/0x360 [] ? rescuer_thread+0x350/0x350 [] kthread+0xdb/0xe0 [] ? _raw_spin_unlock_irq+0x30/0x50 [] ? flush_kthread_worker+0xc0/0xc0 [] ret_from_fork+0x9a/0xd0 [] ? flush_kthread_worker+0xc0/0xc0 Fixes: ba35f8588f47 ("ipvlan: Defer multicast / broadcast processing to a work-queue") Signed-off-by: Mahesh Bandewar CC: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 5 +++++ drivers/net/ipvlan/ipvlan_core.c | 26 +++++++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 031093e1c25f..dbfbb33ac66c 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -99,6 +99,11 @@ struct ipvl_port { int count; }; +struct ipvl_skb_cb { + bool tx_pkt; +}; +#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0])) + static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) { return rcu_dereference(d->rx_handler_data); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index ea6bc1e12cdf..83ce74acf82d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work) unsigned int mac_hash; int ret; u8 pkt_type; - bool hlocal, dlocal; + bool tx_pkt; __skb_queue_head_init(&list); @@ -211,7 +211,7 @@ void ipvlan_process_multicast(struct work_struct *work) bool consumed = false; ethh = eth_hdr(skb); - hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); + tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; mac_hash = ipvlan_mac_hash(ethh->h_dest); if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) @@ -219,13 +219,10 @@ void ipvlan_process_multicast(struct work_struct *work) else pkt_type = PACKET_MULTICAST; - dlocal = false; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (hlocal && (ipvlan->dev == dev)) { - dlocal = true; + if (tx_pkt && (ipvlan->dev == skb->dev)) continue; - } if (!test_bit(mac_hash, ipvlan->mac_filters)) continue; if (!(ipvlan->dev->flags & IFF_UP)) @@ -238,7 +235,7 @@ void ipvlan_process_multicast(struct work_struct *work) consumed = true; nskb->pkt_type = pkt_type; nskb->dev = ipvlan->dev; - if (hlocal) + if (tx_pkt) ret = dev_forward_skb(ipvlan->dev, nskb); else ret = netif_rx(nskb); @@ -248,7 +245,7 @@ void ipvlan_process_multicast(struct work_struct *work) } rcu_read_unlock(); - if (dlocal) { + if (tx_pkt) { /* If the packet originated here, send it out. */ skb->dev = port->dev; skb->pkt_type = pkt_type; @@ -480,13 +477,20 @@ out: } static void ipvlan_multicast_enqueue(struct ipvl_port *port, - struct sk_buff *skb) + struct sk_buff *skb, bool tx_pkt) { if (skb->protocol == htons(ETH_P_PAUSE)) { kfree_skb(skb); return; } + /* Record that the deferred packet is from TX or RX path. By + * looking at mac-addresses on packet will lead to erronus decisions. + * (This would be true for a loopback-mode on master device or a + * hair-pin mode of the switch.) + */ + IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; + spin_lock(&port->backlog.lock); if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { if (skb->dev) @@ -549,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) } else if (is_multicast_ether_addr(eth->h_dest)) { ipvlan_skb_crossing_ns(skb, NULL); - ipvlan_multicast_enqueue(ipvlan->port, skb); + ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS; } @@ -646,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, */ if (nskb) { ipvlan_skb_crossing_ns(nskb, NULL); - ipvlan_multicast_enqueue(port, nskb); + ipvlan_multicast_enqueue(port, nskb, false); } } } else { From 693c56491fb720087437a635e6eaf440659b922f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Dec 2016 07:22:29 -0500 Subject: [PATCH 04/57] tipc: don't send FIN message from connectionless socket In commit 6f00089c7372 ("tipc: remove SS_DISCONNECTING state") the check for socket type is in the wrong place, causing a closing socket to always send out a FIN message even when the socket was never connected. This is normally harmless, since the destination node for such messages most often is zero, and the message will be dropped, but it is still a wrong and confusing behavior. We fix this in this commit. Reviewed-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 333c5dae0072..800caaa699a1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error) while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (TIPC_SKB_CB(skb)->bytes_read) { kfree_skb(skb); - } else { - if (!tipc_sk_type_connectionless(sk) && - sk->sk_state != TIPC_DISCONNECTING) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, error); + continue; } + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + } + tipc_sk_respond(sk, skb, error); } + + if (tipc_sk_type_connectionless(sk)) + return; + if (sk->sk_state != TIPC_DISCONNECTING) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, @@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error) tsk->portid, error); if (skb) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - if (!tipc_sk_type_connectionless(sk)) { - tipc_node_remove_conn(net, dnode, tsk->portid); - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - } + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); } } From eb9def61be7197669cab51f43789b53aa7a69509 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 22 Dec 2016 14:32:58 +0200 Subject: [PATCH 05/57] net/mlx4_en: Fix user prio field in XDP forward The user prio field is wrong (and overflows) in the XDP forward flow. This is a result of a bad value for num_tx_rings_p_up, which should account all XDP TX rings, as they operate for the same user prio. Signed-off-by: Tariq Toukan Reported-by: Martin KaFai Lau Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bcd955339058..edbe200ac2fa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev) /* Configure tx cq's and rings */ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { - u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; + u8 num_tx_rings_p_up = t == TX ? + priv->num_tx_rings_p_up : priv->tx_ring_num[t]; for (i = 0; i < priv->tx_ring_num[t]; i++) { /* Configure cq */ From 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Dec 2016 18:04:11 +0100 Subject: [PATCH 06/57] net, sched: fix soft lockup in tc_classify Shahar reported a soft lockup in tc_classify(), where we run into an endless loop when walking the classifier chain due to tp->next == tp which is a state we should never run into. The issue only seems to trigger under load in the tc control path. What happens is that in tc_ctl_tfilter(), thread A allocates a new tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() with it. In that classifier callback we had to unlock/lock the rtnl mutex and returned with -EAGAIN. One reason why we need to drop there is, for example, that we need to request an action module to be loaded. This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning after we loaded and found the requested action, we need to redo the whole request so we don't race against others. While we had to unlock rtnl in that time, thread B's request was processed next on that CPU. Thread B added a new tp instance successfully to the classifier chain. When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN and destroying its tp instance which never got linked, we goto replay and redo A's request. This time when walking the classifier chain in tc_ctl_tfilter() for checking for existing tp instances we had a priority match and found the tp instance that was created and linked by thread B. Now calling again into tp->ops->change() with that tp was successful and returned without error. tp_created was never cleared in the second round, thus kernel thinks that we need to link it into the classifier chain (once again). tp and *back point to the same object due to the match we had earlier on. Thus for thread B's already public tp, we reset tp->next to tp itself and link it into the chain, which eventually causes the mentioned endless loop in tc_classify() once a packet hits the data path. Fix is to clear tp_created at the beginning of each request, also when we replay it. On the paths that can cause -EAGAIN we already destroy the original tp instance we had and on replay we really need to start from scratch. It seems that this issue was first introduced in commit 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup"). Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") Reported-by: Shahar Klein Signed-off-by: Daniel Borkmann Cc: Cong Wang Acked-by: Eric Dumazet Tested-by: Shahar Klein Signed-off-by: David S. Miller --- net/sched/cls_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3fbba79a4ef0..1ecdf809b5fa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; From e6afb1ad88feddf2347ea779cfaf4d03d3cd40b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 Dec 2016 19:56:56 -0800 Subject: [PATCH 07/57] net: korina: Fix NAPI versus resources freeing Commit beb0babfb77e ("korina: disable napi on close and restart") introduced calls to napi_disable() that were missing before, unfortunately this leaves a small window during which NAPI has a chance to run, yet we just freed resources since korina_free_ring() has been called: Fix this by disabling NAPI first then freeing resource, and make sure that we also cancel the restart task before doing the resource freeing. Fixes: beb0babfb77e ("korina: disable napi on close and restart") Reported-by: Alexandros C. Couloumbis Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index cbeea915f026..8037426ec50f 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work) DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); + korina_free_ring(dev); + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); return; @@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev) tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; writel(tmp, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); cancel_work_sync(&lp->restart_task); + korina_free_ring(dev); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); From b4b8664d291ac1998e0f0bcdc96b6397f0fe68b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Dec 2016 04:10:19 -0500 Subject: [PATCH 08/57] arm64: don't pull uaccess.h into *.S Split asm-only parts of arm64 uaccess.h into a new header and use that from *.S. Signed-off-by: Al Viro --- arch/arm64/include/asm/asm-uaccess.h | 65 ++++++++++++++++++++++++++++ arch/arm64/include/asm/uaccess.h | 64 --------------------------- arch/arm64/kernel/entry.S | 2 +- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/xen/hypercall.S | 2 +- 9 files changed, 72 insertions(+), 71 deletions(-) create mode 100644 arch/arm64/include/asm/asm-uaccess.h diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h new file mode 100644 index 000000000000..df411f3e083c --- /dev/null +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -0,0 +1,65 @@ +#ifndef __ASM_ASM_UACCESS_H +#define __ASM_ASM_UACCESS_H + +#include +#include +#include +#include + +/* + * User access enabling/disabling macros. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + .macro __uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro __uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_disable, tmp1 +alternative_if_not ARM64_HAS_PAN + __uaccess_ttbr0_disable \tmp1 +alternative_else_nop_endif + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + __uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else_nop_endif + .endm +#else + .macro uaccess_ttbr0_disable, tmp1 + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 + .endm +#endif + +/* + * These macros are no-ops when UAO is present. + */ + .macro uaccess_disable_not_uao, tmp1 + uaccess_ttbr0_disable \tmp1 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(1) +alternative_else_nop_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 + uaccess_ttbr0_enable \tmp1, \tmp2 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(0) +alternative_else_nop_endif + .endm + +#endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index d26750ca6e06..46da3ea638bb 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -22,8 +22,6 @@ #include #include -#ifndef __ASSEMBLY__ - /* * User space memory access functions */ @@ -424,66 +422,4 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#else /* __ASSEMBLY__ */ - -#include - -/* - * User access enabling/disabling macros. - */ -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - .macro __uaccess_ttbr0_disable, tmp1 - mrs \tmp1, ttbr1_el1 // swapper_pg_dir - add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir - msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb - .endm - - .macro __uaccess_ttbr0_enable, tmp1 - get_thread_info \tmp1 - ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 - msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 - isb - .endm - - .macro uaccess_ttbr0_disable, tmp1 -alternative_if_not ARM64_HAS_PAN - __uaccess_ttbr0_disable \tmp1 -alternative_else_nop_endif - .endm - - .macro uaccess_ttbr0_enable, tmp1, tmp2 -alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 -alternative_else_nop_endif - .endm -#else - .macro uaccess_ttbr0_disable, tmp1 - .endm - - .macro uaccess_ttbr0_enable, tmp1, tmp2 - .endm -#endif - -/* - * These macros are no-ops when UAO is present. - */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(1) -alternative_else_nop_endif - .endm - - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(0) -alternative_else_nop_endif - .endm - -#endif /* __ASSEMBLY__ */ - #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7504f40d7ee..923841ffe4a9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index add4a1334085..e88fb99c1561 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,7 +17,7 @@ */ #include -#include +#include .text diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index fd6cd05593f9..4b5d826895ff 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -17,7 +17,7 @@ #include #include -#include +#include /* * Copy from user space to a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index d828540ded6f..47184c3a97da 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -19,7 +19,7 @@ #include #include -#include +#include /* * Copy from user space to user space (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 3e6ae2663b82..351f0766f7a6 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -17,7 +17,7 @@ #include #include -#include +#include /* * Copy to user space from a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 17f422a4dc55..83c27b6e6dca 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -23,7 +23,7 @@ #include #include #include -#include +#include /* * flush_icache_range(start,end) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 47cf3f9d89ff..947830a459d2 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,7 +49,7 @@ #include #include -#include +#include #include From b9d9d6911bd5c370ad4b3aa57d758c093d17aed5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Dec 2016 22:58:19 +0100 Subject: [PATCH 09/57] smp/hotplug: Undo tglxs brainfart The attempt to prevent overwriting an active state resulted in a disaster which effectively disables all dynamically allocated hotplug states. Cleanup the mess. Fixes: dc280d936239 ("cpu/hotplug: Prevent overwriting of callbacks") Reported-by: Markus Trippelsdorf Reported-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/cpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 042fd7e8e030..f75c4d031eeb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1471,6 +1471,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, bool multi_instance) { int cpu, ret = 0; + bool dynstate; if (cpuhp_cb_check(state) || !name) return -EINVAL; @@ -1480,6 +1481,12 @@ int __cpuhp_setup_state(enum cpuhp_state state, ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); + dynstate = state == CPUHP_AP_ONLINE_DYN; + if (ret > 0 && dynstate) { + state = ret; + ret = 0; + } + if (ret || !invoke || !startup) goto out; @@ -1508,7 +1515,7 @@ out: * If the requested state is CPUHP_AP_ONLINE_DYN, return the * dynamically allocated state in case of success. */ - if (!ret && state == CPUHP_AP_ONLINE_DYN) + if (!ret && dynstate) return state; return ret; } From 0dad3a3014a0b9e72521ff44f17e0054f43dcdea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Dec 2016 22:58:20 +0100 Subject: [PATCH 10/57] x86/mce/AMD: Make the init code more robust If mce_device_init() fails then the mce device pointer is NULL and the AMD mce code happily dereferences it. Add a sanity check. Reported-by: Markus Trippelsdorf Reported-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ffacfdcacb85..a5fd137417a2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1182,6 +1182,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) const char *name = get_name(bank, NULL); int err = 0; + if (!dev) + return -ENODEV; + if (is_shared_bank(bank)) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); From e568df6b84ff05a22467503afc11bee7a6ba0700 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 16:42:53 +0200 Subject: [PATCH 11/57] ext2: Return BH_New buffers for zeroed blocks So far we did not return BH_New buffers from ext2_get_blocks() when we allocated and zeroed-out a block for DAX inode to avoid racy zeroing in DAX code. This zeroing is gone these days so we can remove the workaround. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/ext2/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0093ea2512a8..f073bfca694b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode, mutex_unlock(&ei->truncate_mutex); goto cleanup; } - } else { - *new = true; } + *new = true; ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); From c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 17:22:44 +0200 Subject: [PATCH 12/57] mm: Invalidate DAX radix tree entries only if appropriate Currently invalidate_inode_pages2_range() and invalidate_mapping_pages() just delete all exceptional radix tree entries they find. For DAX this is not desirable as we track cache dirtiness in these entries and when they are evicted, we may not flush caches although it is necessary. This can for example manifest when we write to the same block both via mmap and via write(2) (to different offsets) and fsync(2) then does not properly flush CPU caches when modification via write(2) was the last one. Create appropriate DAX functions to handle invalidation of DAX entries for invalidate_inode_pages2_range() and invalidate_mapping_pages() and wire them up into the corresponding mm functions. Acked-by: Johannes Weiner Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 75 +++++++++++++++++++++++++++++++++++++-------- include/linux/dax.h | 3 ++ mm/truncate.c | 75 ++++++++++++++++++++++++++++++++++++--------- 3 files changed, 127 insertions(+), 26 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index a8732fbed381..bcfedd184860 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } +static int __dax_invalidate_mapping_entry(struct address_space *mapping, + pgoff_t index, bool trunc) +{ + int ret = 0; + void *entry; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = get_unlocked_mapping_entry(mapping, index, NULL); + if (!entry || !radix_tree_exceptional_entry(entry)) + goto out; + if (!trunc && + (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) + goto out; + radix_tree_delete(page_tree, index); + mapping->nrexceptional--; + ret = 1; +out: + put_unlocked_mapping_entry(mapping, index, entry); + spin_unlock_irq(&mapping->tree_lock); + return ret; +} /* * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * entry to get unlocked before deleting it. */ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { - void *entry; + int ret = __dax_invalidate_mapping_entry(mapping, index, true); - spin_lock_irq(&mapping->tree_lock); - entry = get_unlocked_mapping_entry(mapping, index, NULL); /* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the @@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) * caller has seen exceptional entry for this index, we better find it * at that index as well... */ - if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) { - spin_unlock_irq(&mapping->tree_lock); - return 0; - } - radix_tree_delete(&mapping->page_tree, index); - mapping->nrexceptional--; - spin_unlock_irq(&mapping->tree_lock); - dax_wake_mapping_entry_waiter(mapping, index, entry, true); + WARN_ON_ONCE(!ret); + return ret; +} - return 1; +/* + * Invalidate exceptional DAX entry if easily possible. This handles DAX + * entries for invalidate_inode_pages() so we evict the entry only if we can + * do so without blocking. + */ +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) +{ + int ret = 0; + void *entry, **slot; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = __radix_tree_lookup(page_tree, index, NULL, &slot); + if (!entry || !radix_tree_exceptional_entry(entry) || + slot_locked(mapping, slot)) + goto out; + if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) + goto out; + radix_tree_delete(page_tree, index); + mapping->nrexceptional--; + ret = 1; +out: + spin_unlock_irq(&mapping->tree_lock); + if (ret) + dax_wake_mapping_entry_waiter(mapping, index, entry, true); + return ret; +} + +/* + * Invalidate exceptional DAX entry if it is clean. + */ +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index) +{ + return __dax_invalidate_mapping_entry(mapping, index, false); } /* diff --git a/include/linux/dax.h b/include/linux/dax.h index f97bcfe79472..24ad71173995 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); diff --git a/mm/truncate.c b/mm/truncate.c index fd97f1dbce29..dd7b24e083c5 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -24,20 +24,12 @@ #include #include "internal.h" -static void clear_exceptional_entry(struct address_space *mapping, - pgoff_t index, void *entry) +static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, + void *entry) { struct radix_tree_node *node; void **slot; - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) - return; - - if (dax_mapping(mapping)) { - dax_delete_mapping_entry(mapping, index); - return; - } spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even @@ -55,6 +47,56 @@ unlock: spin_unlock_irq(&mapping->tree_lock); } +/* + * Unconditionally remove exceptional entry. Usually called from truncate path. + */ +static void truncate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return; + + if (dax_mapping(mapping)) { + dax_delete_mapping_entry(mapping, index); + return; + } + clear_shadow_entry(mapping, index, entry); +} + +/* + * Invalidate exceptional entry if easily possible. This handles exceptional + * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and + * clean entries. + */ +static int invalidate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + +/* + * Invalidate exceptional entry if clean. This handles exceptional entries for + * invalidate_inode_pages2() so for DAX it evicts only clean entries. + */ +static int invalidate_exceptional_entry2(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry_sync(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + /** * do_invalidatepage - invalidate part or all of a page * @page: the page which is affected @@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, } if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + invalidate_exceptional_entry(mapping, index, + page); continue; } @@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + if (!invalidate_exceptional_entry2(mapping, + index, page)) + ret = -EBUSY; continue; } From e3fce68cdbed297d927e993b3ea7b8b1cee545da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 17:10:28 +0200 Subject: [PATCH 13/57] dax: Avoid page invalidation races and unnecessary radix tree traversals Currently dax_iomap_rw() takes care of invalidating page tables and evicting hole pages from the radix tree when write(2) to the file happens. This invalidation is only necessary when there is some block allocation resulting from write(2). Furthermore in current place the invalidation is racy wrt page fault instantiating a hole page just after we have invalidated it. So perform the page invalidation inside dax_iomap_actor() where we can do it only when really necessary and after blocks have been allocated so nobody will be instantiating new hole pages anymore. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index bcfedd184860..08e15db28b79 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -985,6 +985,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; + /* + * Write can allocate block for an area which has a hole page mapped + * into page tables. We have to tear down these mappings so that data + * written by write(2) is visible in mmap. + */ + if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + invalidate_inode_pages2_range(inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); + } + while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); struct blk_dax_ctl dax = { 0 }; @@ -1043,23 +1054,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) flags |= IOMAP_WRITE; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. We'll eventually need to shift this down even further so that - * we can check if we allocated blocks over a hole first. - */ - if (mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, - (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); From f449b936f1aff7696b24a338f493d5cee8d48d55 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Oct 2016 14:48:38 +0200 Subject: [PATCH 14/57] dax: Finish fault completely when loading holes The only case when we do not finish the page fault completely is when we are loading hole pages into a radix tree. Avoid this special case and finish the fault in that case as well inside the DAX fault handler. It will allow us for easier iomap handling. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 08e15db28b79..bfec6f2ef613 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -539,15 +539,16 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, * otherwise it will simply fall out of the page cache under memory * pressure without ever having been dirtied. */ -static int dax_load_hole(struct address_space *mapping, void *entry, +static int dax_load_hole(struct address_space *mapping, void **entry, struct vm_fault *vmf) { struct page *page; + int ret; /* Hole page already exists? Return it... */ - if (!radix_tree_exceptional_entry(entry)) { - vmf->page = entry; - return VM_FAULT_LOCKED; + if (!radix_tree_exceptional_entry(*entry)) { + page = *entry; + goto out; } /* This will replace locked radix tree entry with a hole page */ @@ -555,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry, vmf->gfp_mask | __GFP_ZERO); if (!page) return VM_FAULT_OOM; + out: vmf->page = page; - return VM_FAULT_LOCKED; + ret = finish_fault(vmf); + vmf->page = NULL; + *entry = page; + if (!ret) { + /* Grab reference for PTE that is now referencing the page */ + get_page(page); + return VM_FAULT_NOPAGE; + } + return ret; } static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, @@ -1163,8 +1173,8 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { - vmf_ret = dax_load_hole(mapping, entry, vmf); - break; + vmf_ret = dax_load_hole(mapping, &entry, vmf); + goto finish_iomap; } /*FALLTHRU*/ default: @@ -1185,8 +1195,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } } unlock_entry: - if (vmf_ret != VM_FAULT_LOCKED || error) - put_locked_mapping_entry(mapping, vmf->pgoff, entry); + put_locked_mapping_entry(mapping, vmf->pgoff, entry); out: if (error == -ENOMEM) return VM_FAULT_OOM | major; From 9f141d6ef6258a3a37a045842d9ba7e68f368956 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Oct 2016 14:34:31 +0200 Subject: [PATCH 15/57] dax: Call ->iomap_begin without entry lock during dax fault Currently ->iomap_begin() handler is called with entry lock held. If the filesystem held any locks between ->iomap_begin() and ->iomap_end() (such as ext4 which will want to hold transaction open), this would cause lock inversion with the iomap_apply() from standard IO path which first calls ->iomap_begin() and only then calls ->actor() callback which grabs entry locks for DAX (if it faults when copying from/to user provided buffers). Fix the problem by nesting grabbing of entry lock inside ->iomap_begin() - ->iomap_end() pair. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 123 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 67 insertions(+), 56 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index bfec6f2ef613..5c74f60d0a50 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1078,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, } EXPORT_SYMBOL_GPL(dax_iomap_rw); +static int dax_fault_return(int error) +{ + if (error == 0) + return VM_FAULT_NOPAGE; + if (error == -ENOMEM) + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; +} + /** * dax_iomap_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred @@ -1110,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, if (pos >= i_size_read(inode)) return VM_FAULT_SIGBUS; - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); - if (IS_ERR(entry)) { - error = PTR_ERR(entry); - goto out; - } - if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) flags |= IOMAP_WRITE; @@ -1126,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); if (error) - goto unlock_entry; + return dax_fault_return(error); if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { - error = -EIO; /* fs corruption? */ + vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ + goto finish_iomap; + } + + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); + if (IS_ERR(entry)) { + vmf_ret = dax_fault_return(PTR_ERR(entry)); goto finish_iomap; } @@ -1151,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } if (error) - goto finish_iomap; + goto error_unlock_entry; __SetPageUptodate(vmf->cow_page); vmf_ret = finish_fault(vmf); if (!vmf_ret) vmf_ret = VM_FAULT_DONE_COW; - goto finish_iomap; + goto unlock_entry; } switch (iomap.type) { @@ -1169,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE, &entry, vma, vmf); + /* -EBUSY is fine, somebody else faulted on the same PTE */ + if (error == -EBUSY) + error = 0; break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { vmf_ret = dax_load_hole(mapping, &entry, vmf); - goto finish_iomap; + goto unlock_entry; } /*FALLTHRU*/ default: @@ -1183,30 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, break; } - finish_iomap: - if (ops->iomap_end) { - if (error || (vmf_ret & VM_FAULT_ERROR)) { - /* keep previous error */ - ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags, - &iomap); - } else { - error = ops->iomap_end(inode, pos, PAGE_SIZE, - PAGE_SIZE, flags, &iomap); - } - } + error_unlock_entry: + vmf_ret = dax_fault_return(error) | major; unlock_entry: put_locked_mapping_entry(mapping, vmf->pgoff, entry); - out: - if (error == -ENOMEM) - return VM_FAULT_OOM | major; - /* -EBUSY is fine, somebody else faulted on the same PTE */ - if (error < 0 && error != -EBUSY) - return VM_FAULT_SIGBUS | major; - if (vmf_ret) { - WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */ - return vmf_ret; + finish_iomap: + if (ops->iomap_end) { + int copied = PAGE_SIZE; + + if (vmf_ret & VM_FAULT_ERROR) + copied = 0; + /* + * The fault is done by now and there's no way back (other + * thread may be already happily using PTE we have installed). + * Just ignore error from ->iomap_end since we cannot do much + * with it. + */ + ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); } - return VM_FAULT_NOPAGE | major; + return vmf_ret; } EXPORT_SYMBOL_GPL(dax_iomap_fault); @@ -1330,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((pgoff | PG_PMD_COLOUR) > max_pgoff) goto fallback; - /* - * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX - * PMD or a HZP entry. If it can't (because a 4k page is already in - * the tree, for instance), it will return -EEXIST and we just fall - * back to 4k entries. - */ - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); - if (IS_ERR(entry)) - goto fallback; - /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way @@ -1348,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, pos = (loff_t)pgoff << PAGE_SHIFT; error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) - goto unlock_entry; + goto fallback; + if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; + /* + * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX + * PMD or a HZP entry. If it can't (because a 4k page is already in + * the tree, for instance), it will return -EEXIST and we just fall + * back to 4k entries. + */ + entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto finish_iomap; + vmf.pgoff = pgoff; vmf.flags = flags; vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; @@ -1364,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) - goto finish_iomap; + goto unlock_entry; result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, &entry); break; @@ -1373,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, break; } - finish_iomap: - if (ops->iomap_end) { - if (result == VM_FAULT_FALLBACK) { - ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags, - &iomap); - } else { - error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE, - iomap_flags, &iomap); - if (error) - result = VM_FAULT_FALLBACK; - } - } unlock_entry: put_locked_mapping_entry(mapping, pgoff, entry); + finish_iomap: + if (ops->iomap_end) { + int copied = PMD_SIZE; + + if (result == VM_FAULT_FALLBACK) + copied = 0; + /* + * The fault is done by now and there's no way back (other + * thread may be already happily using PMD we have installed). + * Just ignore error from ->iomap_end since we cannot do much + * with it. + */ + ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, + &iomap); + } fallback: if (result == VM_FAULT_FALLBACK) { split_huge_pmd(vma, pmd, address); From 1db175428ee374489448361213e9c3b749d14900 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Oct 2016 11:33:49 +0200 Subject: [PATCH 16/57] ext4: Simplify DAX fault path Now that dax_iomap_fault() calls ->iomap_begin() without entry lock, we can use transaction starting in ext4_iomap_begin() and thus simplify ext4_dax_fault(). It also provides us proper retries in case of ENOSPC. Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/ext4/file.c | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b5f184493c57..d663d3d7c81c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -258,7 +258,6 @@ out: static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int result; - handle_t *handle = NULL; struct inode *inode = file_inode(vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); - down_read(&EXT4_I(inode)->i_mmap_sem); - handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, - EXT4_DATA_TRANS_BLOCKS(sb)); - } else - down_read(&EXT4_I(inode)->i_mmap_sem); - - if (IS_ERR(handle)) - result = VM_FAULT_SIGBUS; - else - result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); - - if (write) { - if (!IS_ERR(handle)) - ext4_journal_stop(handle); - up_read(&EXT4_I(inode)->i_mmap_sem); + } + down_read(&EXT4_I(inode)->i_mmap_sem); + result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); + up_read(&EXT4_I(inode)->i_mmap_sem); + if (write) sb_end_pagefault(sb); - } else - up_read(&EXT4_I(inode)->i_mmap_sem); return result; } @@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { int result; - handle_t *handle = NULL; struct inode *inode = file_inode(vma->vm_file); struct super_block *sb = inode->i_sb; bool write = flags & FAULT_FLAG_WRITE; @@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); - down_read(&EXT4_I(inode)->i_mmap_sem); - handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, - ext4_chunk_trans_blocks(inode, - PMD_SIZE / PAGE_SIZE)); - } else - down_read(&EXT4_I(inode)->i_mmap_sem); - - if (IS_ERR(handle)) - result = VM_FAULT_SIGBUS; - else { - result = dax_iomap_pmd_fault(vma, addr, pmd, flags, - &ext4_iomap_ops); } - - if (write) { - if (!IS_ERR(handle)) - ext4_journal_stop(handle); - up_read(&EXT4_I(inode)->i_mmap_sem); + down_read(&EXT4_I(inode)->i_mmap_sem); + result = dax_iomap_pmd_fault(vma, addr, pmd, flags, + &ext4_iomap_ops); + up_read(&EXT4_I(inode)->i_mmap_sem); + if (write) sb_end_pagefault(sb); - } else - up_read(&EXT4_I(inode)->i_mmap_sem); return result; } From 02608e02fbec04fccf2eb0cc8d8082f65c0a4286 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 21 Dec 2016 12:32:54 -0800 Subject: [PATCH 17/57] crypto: testmgr - Use heap buffer for acomp test input Christopher Covington reported a crash on aarch64 on recent Fedora kernels: kernel BUG at ./include/linux/scatterlist.h:140! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 752 Comm: cryptomgr_test Not tainted 4.9.0-11815-ge93b1cc #162 Hardware name: linux,dummy-virt (DT) task: ffff80007c650080 task.stack: ffff800008910000 PC is at sg_init_one+0xa0/0xb8 LR is at sg_init_one+0x24/0xb8 ... [] sg_init_one+0xa0/0xb8 [] test_acomp+0x10c/0x438 [] alg_test_comp+0xb0/0x118 [] alg_test+0x17c/0x2f0 [] cryptomgr_test+0x44/0x50 [] kthread+0xf8/0x128 [] ret_from_fork+0x10/0x50 The test vectors used for input are part of the kernel image. These inputs are passed as a buffer to sg_init_one which eventually blows up with BUG_ON(!virt_addr_valid(buf)). On arm64, virt_addr_valid returns false for the kernel image since virt_to_page will not return the correct page. Fix this by copying the input vectors to heap buffer before setting up the scatterlist. Reported-by: Christopher Covington Fixes: d7db7a882deb ("crypto: acomp - update testmgr with support for acomp") Signed-off-by: Laura Abbott Signed-off-by: Herbert Xu --- crypto/testmgr.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f616ad74cce7..44e888b0b041 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1461,16 +1461,25 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, for (i = 0; i < ctcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = ctemplate[i].inlen; + void *input_vec; + input_vec = kmalloc(ilen, GFP_KERNEL); + if (!input_vec) { + ret = -ENOMEM; + goto out; + } + + memcpy(input_vec, ctemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); - sg_init_one(&src, ctemplate[i].input, ilen); + sg_init_one(&src, input_vec, ilen); sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { pr_err("alg: acomp: request alloc failed for %s\n", algo); + kfree(input_vec); ret = -ENOMEM; goto out; } @@ -1483,6 +1492,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, if (ret) { pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1491,6 +1501,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", i + 1, algo, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1500,26 +1511,37 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, i + 1, algo); hexdump(output, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } + kfree(input_vec); acomp_request_free(req); } for (i = 0; i < dtcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = dtemplate[i].inlen; + void *input_vec; + input_vec = kmalloc(ilen, GFP_KERNEL); + if (!input_vec) { + ret = -ENOMEM; + goto out; + } + + memcpy(input_vec, dtemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); - sg_init_one(&src, dtemplate[i].input, ilen); + sg_init_one(&src, input_vec, ilen); sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { pr_err("alg: acomp: request alloc failed for %s\n", algo); + kfree(input_vec); ret = -ENOMEM; goto out; } @@ -1532,6 +1554,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, if (ret) { pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1540,6 +1563,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n", i + 1, algo, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1549,10 +1573,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, i + 1, algo); hexdump(output, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } + kfree(input_vec); acomp_request_free(req); } From 56ab6b93007e5000a8812985aec1833c4a6a9ce0 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 25 Dec 2016 14:33:16 +0800 Subject: [PATCH 18/57] ipv4: Namespaceify tcp_tw_reuse knob Different namespaces might have different requirements to reuse TIME-WAIT sockets for new connections. This might be required in cases where different namespace applications are in place which require TIME_WAIT socket connections to be reduced independently of the host. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0cf5a1b777e..0378e88f6fd3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -110,6 +110,7 @@ struct netns_ipv4 { int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_tw_reuse; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 207147b4c6b2..6061963cca98 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..22cbd61079b5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -432,13 +432,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &tcp_adv_win_scale_min, .extra2 = &tcp_adv_win_scale_max, }, - { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_frto", .data = &sysctl_tcp_frto, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f533ada..fe9da4fb96bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include #include -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: From df30f7408b187929dbde72661c7f7c615268f1d0 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 26 Dec 2016 08:31:27 -0800 Subject: [PATCH 19/57] openvswitch: upcall: Fix vlan handling. Networking stack accelerate vlan tag handling by keeping topmost vlan header in skb. This works as long as packet remains in OVS datapath. But during OVS upcall vlan header is pushed on to the packet. When such packet is sent back to OVS datapath, core networking stack might not handle it correctly. Following patch avoids this issue by accelerating the vlan tag during flow key extract. This simplifies datapath by bringing uniform packet processing for packets from all code paths. Fixes: 5108bbaddc ("openvswitch: add processing of L3 packets"). CC: Jarno Rajahalme CC: Jiri Benc Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 1 - net/openvswitch/flow.c | 54 +++++++++++++++++++------------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2d4c4d3911c0..9c62b6325f7a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; - packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 08aa926cd5cf..2c0a00f7f1b7 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb) * Returns 0 if it encounters a non-vlan or incomplete packet. * Returns 1 after successfully parsing vlan tag. */ -static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, + bool untag_vlan) { struct vlan_head *vh = (struct vlan_head *)skb->data; @@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); key_vh->tpid = vh->tpid; - __skb_pull(skb, sizeof(struct vlan_head)); + if (unlikely(untag_vlan)) { + int offset = skb->data - skb_mac_header(skb); + u16 tci; + int err; + + __skb_push(skb, offset); + err = __skb_vlan_pop(skb, &tci); + __skb_pull(skb, offset); + if (err) + return err; + __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci); + } else { + __skb_pull(skb, sizeof(struct vlan_head)); + } return 1; } @@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) key->eth.vlan.tpid = skb->vlan_proto; } else { /* Parse outer vlan tag in the non-accelerated case. */ - res = parse_vlan_tag(skb, &key->eth.vlan); + res = parse_vlan_tag(skb, &key->eth.vlan, true); if (res <= 0) return res; } /* Parse inner vlan tag. */ - res = parse_vlan_tag(skb, &key->eth.cvlan); + res = parse_vlan_tag(skb, &key->eth.cvlan, false); if (res <= 0) return res; @@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, if (err) return err; - if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { - /* key_extract assumes that skb->protocol is set-up for - * layer 3 packets which is the case for other callers, - * in particular packets recieved from the network stack. - * Here the correct value can be set from the metadata - * extracted above. - */ - skb->protocol = key->eth.type; - } else { - struct ethhdr *eth; - - skb_reset_mac_header(skb); - eth = eth_hdr(skb); - - /* Normally, setting the skb 'protocol' field would be - * handled by a call to eth_type_trans(), but it assumes - * there's a sending device, which we may not have. - */ - if (eth_proto_is_802_3(eth->h_proto)) - skb->protocol = eth->h_proto; - else - skb->protocol = htons(ETH_P_802_2); - } + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets received from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + * For L2 packet key eth type would be zero. skb protocol + * would be set to correct value later during key-extact. + */ + skb->protocol = key->eth.type; return key_extract(skb, key); } From be26727772cd86979255dfaf1eea967338dc0c9b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Dec 2016 10:49:54 +0800 Subject: [PATCH 20/57] net: xdp: remove unused bfp_warn_invalid_xdp_buffer() After commit 73b62bd085f4737679ea9afc7867fa5f99ba7d1b ("virtio-net: remove the warning before XDP linearizing"), there's no users for bpf_warn_invalid_xdp_buffer(), so remove it. This is a revert for commit f23bc46c30ca5ef58b8549434899fcbac41b2cfc. Cc: Daniel Borkmann Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 1 - net/core/filter.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 702314253797..a0934e6c9bab 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_buffer(void); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/net/core/filter.c b/net/core/filter.c index 7190bd648154..b1461708a977 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -void bpf_warn_invalid_xdp_buffer(void) -{ - WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); -} -EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); - static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, From 610c908773d30907c950ca3b2ee8ac4b2813537b Mon Sep 17 00:00:00 2001 From: Chun-Hao Lin Date: Tue, 27 Dec 2016 16:29:43 +0800 Subject: [PATCH 21/57] r8169: add support for RTL8168 series add-on card. This chip is the same as RTL8168, but its device id is 0x8161. Signed-off-by: Chun-Hao Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index f9b97f5946f8..44389c90056a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -326,6 +326,7 @@ enum cfg_version { static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, From 5799fc905930f866c7d32aaf81b31f8027297506 Mon Sep 17 00:00:00 2001 From: "Kweh, Hock Leong" Date: Wed, 28 Dec 2016 04:07:41 +0800 Subject: [PATCH 22/57] net: stmmac: fix incorrect bit set in gmac4 mdio addr register Fixing the gmac4 mdio write access to use MII_GMAC4_WRITE only instead of OR together with MII_WRITE. Signed-off-by: Kweh, Hock Leong Acked-By: Joao Pinto Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index fda01f770eff..b0344c213752 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 value = MII_WRITE | MII_BUSY; + u32 value = MII_BUSY; value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; @@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, & priv->hw->mii.clk_csr_mask; if (priv->plat->has_gmac4) value |= MII_GMAC4_WRITE; + else + value |= MII_WRITE; /* Wait until any existing MII operation is complete */ if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) From 66115335fbb411365c23349b2fbe7e041eabbaf2 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Fri, 23 Dec 2016 00:53:10 +0000 Subject: [PATCH 23/57] docs: Fix build failure The 80211.tmpl DocBook file was removed in commit 819bf593767c ("docs-rst: sphinxify 802.11 documentation"), but the 80211.xml target was re-added to the Makefile by commit 7ddedebb03b7 ("ALSA: doc: ReSTize writing-an-alsa-driver document"), leading to a failure when building the documentation: *** No rule to make target 'Documentation/DocBook/80211.xml', needed by 'Documentation/DocBook/80211.aux.xml'. cc: stable@vger.kernel.org Signed-off-by: John Brooks Mea-culpa-by: Jonathan Corbet Signed-off-by: Jonathan Corbet --- Documentation/DocBook/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index c75e5d6b8fa8..a6eb7dcd4dd5 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml \ kernel-api.xml filesystems.xml lsm.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ - 80211.xml sh.xml regulator.xml w1.xml \ + sh.xml regulator.xml w1.xml \ writing_musb_glue_layer.xml iio.xml ifeq ($(DOCBOOKS),) From 36f671be1db1b17d3d4ab0c8b47f81ccb1efcb75 Mon Sep 17 00:00:00 2001 From: Cihangir Akturk Date: Sat, 17 Dec 2016 19:42:17 +0200 Subject: [PATCH 24/57] Documentation/unaligned-memory-access.txt: fix incorrect comparison operator In the actual implementation ether_addr_equal function tests for equality to 0 when returning. It seems in commit 0d74c4 it is somehow overlooked to change this operator to reflect the actual function. Signed-off-by: Cihangir Akturk Signed-off-by: Jonathan Corbet --- Documentation/unaligned-memory-access.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt index a445da098bc6..3f76c0c37920 100644 --- a/Documentation/unaligned-memory-access.txt +++ b/Documentation/unaligned-memory-access.txt @@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2) #else const u16 *a = (const u16 *)addr1; const u16 *b = (const u16 *)addr2; - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; #endif } From b91e1302ad9b80c174a4855533f7e3aa2873355e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 Dec 2016 11:40:38 -0800 Subject: [PATCH 25/57] mm: optimize PageWaiters bit use for unlock_page() In commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Nick Piggin made our page locking no longer unconditionally touch the hashed page waitqueue, which not only helps performance in general, but is particularly helpful on NUMA machines where the hashed wait queues can bounce around a lot. However, the "clear lock bit atomically and then test the waiters bit" sequence turns out to be much more expensive than it needs to be, because you get a nasty stall when trying to access the same word that just got updated atomically. On architectures where locking is done with LL/SC, this would be trivial to fix with a new primitive that clears one bit and tests another atomically, but that ends up not working on x86, where the only atomic operations that return the result end up being cmpxchg and xadd. The atomic bit operations return the old value of the same bit we changed, not the value of an unrelated bit. On x86, we could put the lock bit in the high bit of the byte, and use "xadd" with that bit (where the overflow ends up not touching other bits), and look at the other bits of the result. However, an even simpler model is to just use a regular atomic "and" to clear the lock bit, and then the sign bit in eflags will indicate the resulting state of the unrelated bit #7. So by moving the PageWaiters bit up to bit #7, we can atomically clear the lock bit and test the waiters bit on x86 too. And architectures with LL/SC (which is all the usual RISC suspects), the particular bit doesn't matter, so they are fine with this approach too. This avoids the extra access to the same atomic word, and thus avoids the costly stall at page unlock time. The only downside is that the interface ends up being a bit odd and specialized: clear a bit in a byte, and test the sign bit. Nick doesn't love the resulting name of the new primitive, but I'd rather make the name be descriptive and very clear about the limitation imposed by trying to work across all relevant architectures than make it be some generic thing that doesn't make the odd semantics explicit. So this introduces the new architecture primitive clear_bit_unlock_is_negative_byte(); and adds the trivial implementation for x86. We have a generic non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)" combination) which can be overridden by any architecture that can do better. According to Nick, Power has the same hickup x86 has, for example, but some other architectures may not even care. All these optimizations mean that my page locking stress-test (which is just executing a lot of small short-lived shell scripts: "make test" in the git source tree) no longer makes our page locking look horribly bad. Before all these optimizations, just the unlock_page() costs were just over 3% of all CPU overhead on "make test". After this, it's down to 0.66%, so just a quarter of the cost it used to be. (The difference on NUMA is bigger, but there this micro-optimization is likely less noticeable, since the big issue on NUMA was not the accesses to 'struct page', but the waitqueue accesses that were already removed by Nick's earlier commit). Acked-by: Nick Piggin Cc: Dave Hansen Cc: Bob Peterson Cc: Steven Whitehouse Cc: Andrew Lutomirski Cc: Andreas Gruenbacher Cc: Peter Zijlstra Cc: Mel Gorman Signed-off-by: Linus Torvalds --- arch/x86/include/asm/bitops.h | 13 +++++++++++++ include/linux/page-flags.h | 2 +- mm/filemap.c | 36 ++++++++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 68557f52b961..854022772c5b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } +static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +{ + bool negative; + asm volatile(LOCK_PREFIX "andb %2,%1\n\t" + CC_SET(s) + : CC_OUT(s) (negative), ADDR + : "ir" ((char) ~(1 << nr)) : "memory"); + return negative; +} + +// Let everybody know we have it +#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte + /* * __clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c56b39890a41..6b5818d6de32 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -73,13 +73,13 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ - PG_waiters, /* Page has waiters, check its waitqueue */ PG_error, PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_active, + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, diff --git a/mm/filemap.c b/mm/filemap.c index 82f26cde830c..6b1d96f86a9c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) } EXPORT_SYMBOL_GPL(add_page_wait_queue); +#ifndef clear_bit_unlock_is_negative_byte + +/* + * PG_waiters is the high bit in the same byte as PG_lock. + * + * On x86 (and on many other architectures), we can clear PG_lock and + * test the sign bit at the same time. But if the architecture does + * not support that special operation, we just do this all by hand + * instead. + * + * The read of PG_waiters has to be after (or concurrently with) PG_locked + * being cleared, but a memory barrier should be unneccssary since it is + * in the same byte as PG_locked. + */ +static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) +{ + clear_bit_unlock(nr, mem); + /* smp_mb__after_atomic(); */ + return test_bit(PG_waiters); +} + +#endif + /** * unlock_page - unlock a locked page * @page: the page @@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ void unlock_page(struct page *page) { + BUILD_BUG_ON(PG_waiters != 7); page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); - clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_atomic(); - wake_up_page(page, PG_locked); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } EXPORT_SYMBOL(unlock_page); From 98473f9f3f9bd404873cd1178c8be7d6d619f0d1 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 29 Dec 2016 14:16:07 -0800 Subject: [PATCH 26/57] mm/filemap: fix parameters to test_bit() mm/filemap.c: In function 'clear_bit_unlock_is_negative_byte': mm/filemap.c:933:9: error: too few arguments to function 'test_bit' return test_bit(PG_waiters); ^~~~~~~~ Fixes: b91e1302ad9b ('mm: optimize PageWaiters bit use for unlock_page()') Signed-off-by: Olof Johansson Brown-paper-bag-by: Linus Torvalds Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6b1d96f86a9c..d0e4d1002059 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -930,7 +930,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem { clear_bit_unlock(nr, mem); /* smp_mb__after_atomic(); */ - return test_bit(PG_waiters); + return test_bit(PG_waiters, mem); } #endif From 0c744ea4f77d72b3dcebb7a8f2684633ec79be88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Jan 2017 14:31:53 -0800 Subject: [PATCH 27/57] Linux 4.10-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ec411ba9e40f..5470d599384a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Roaring Lionus # *DOCUMENTATION* From 00a19f3e25c0c40e0ec77f52d4841d23ad269169 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 8 Nov 2016 09:21:19 +0100 Subject: [PATCH 28/57] ARM: 8627/1: avoid cache flushing in flush_dcache_page() When the data cache is PIPT or VIPT non-aliasing, and cache operations are broadcast by the hardware, we can always postpone the flush in flush_dcache_page(). A similar change was done for ARM64 in commit b5b6c9e9149d ("arm64: Avoid cache flushing in flush_dcache_page()"). Reviewed-by: Catalin Marinas Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/mm/flush.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 3cced8455727..f1e6190aa7ea 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -327,6 +327,12 @@ void flush_dcache_page(struct page *page) if (page == ZERO_PAGE(0)) return; + if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) { + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); + return; + } + mapping = page_mapping(page); if (!cache_ops_need_broadcast() && From 79964a1c2972ca7ecc231e2d2ac7593a1af73f63 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 12 Dec 2016 09:31:18 +0100 Subject: [PATCH 29/57] ARM: 8633/1: nommu: allow mmap when !CONFIG_MMU commit ab6494f0c96f ("nommu: Add noMMU support to the DMA API") have add CONFIG_MMU compilation flag but that prohibit to use dma_mmap_wc() when the platform doesn't have MMU. This patch call vm_iomap_memory() in noMMU case to test if addresses are correct and set vma->vm_flags rather than all return an error. Signed-off-by: Benjamin Gaignard Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab7710002ba6..afa64ed5e36b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -868,6 +868,9 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_end - vma->vm_start, vma->vm_page_prot); } +#else + ret = vm_iomap_memory(vma, vma->vm_start, + (vma->vm_end - vma->vm_start)); #endif /* CONFIG_MMU */ return ret; From 8a792e9afbce84a0fdaf213fe42bb97382487094 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Sat, 7 Jan 2017 17:48:10 +0100 Subject: [PATCH 30/57] ARM: 8635/1: nommu: allow enabling REMAP_VECTORS_TO_RAM REMAP_VECTORS_TO_RAM depends on DRAM_BASE, but since DRAM_BASE is a hex, REMAP_VECTORS_TO_RAM could never get enabled. Also depending on DRAM_BASE is redundant as whenever REMAP_VECTORS_TO_RAM makes itself available to Kconfig, DRAM_BASE also is available as the Kconfig gets sourced on !MMU. Signed-off-by: Afzal Mohammed Reviewed-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/Kconfig-nommu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index aed66d5df7f1..b7576349528c 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -34,8 +34,7 @@ config PROCESSOR_ID used instead of the auto-probing which utilizes the register. config REMAP_VECTORS_TO_RAM - bool 'Install vectors to the beginning of RAM' if DRAM_BASE - depends on DRAM_BASE + bool 'Install vectors to the beginning of RAM' help The kernel needs to change the hardware exception vectors. In nommu mode, the hardware exception vectors are normally From fa5b6ec9e5274aeae2326e25995506a953e5f878 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:40 -0800 Subject: [PATCH 31/57] lib/Kconfig.debug: Add ARCH_HAS_DEBUG_VIRTUAL DEBUG_VIRTUAL currently depends on DEBUG_KERNEL && X86. arm64 is getting the same support. Rather than add a list of architectures, switch this to ARCH_HAS_DEBUG_VIRTUAL and let architectures select it as appropriate. Acked-by: Ingo Molnar Reviewed-by: Mark Rutland Tested-by: Mark Rutland Suggested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/x86/Kconfig | 1 + lib/Kconfig.debug | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..f1d4e8f2131f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,6 +46,7 @@ config X86 select ARCH_CLOCKSOURCE_DATA select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b06848a104e6..2aed31608b86 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -622,9 +622,12 @@ config DEBUG_VM_PGFLAGS If unsure, say N. +config ARCH_HAS_DEBUG_VIRTUAL + bool + config DEBUG_VIRTUAL bool "Debug VM translations" - depends on DEBUG_KERNEL && X86 + depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL help Enable some costly sanity checks in virtual to page code. This can catch mistakes with virt_to_page() and friends. From 2dece445b6dbdaa3d94f38ef44aa1b63bc2a6bb9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:41 -0800 Subject: [PATCH 32/57] mm/cma: Cleanup highmem check 6b101e2a3ce4 ("mm/CMA: fix boot regression due to physical address of high_memory") added checks to use __pa_nodebug on x86 since CONFIG_DEBUG_VIRTUAL complains about high_memory not being linearlly mapped. arm64 is now getting support for CONFIG_DEBUG_VIRTUAL as well. Rather than add an explosion of arches to the #ifdef, switch to an alternate method to calculate the physical start of highmem using the page before highmem starts. This avoids the need for the #ifdef and extra __pa_nodebug calls. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- mm/cma.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index c960459eda7e..94b3460cd608 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -235,18 +235,13 @@ int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t highmem_start; int ret = 0; -#ifdef CONFIG_X86 /* - * high_memory isn't direct mapped memory so retrieving its physical - * address isn't appropriate. But it would be useful to check the - * physical address of the highmem boundary so it's justifiable to get - * the physical address from it. On x86 there is a validation check for - * this case, so the following workaround is needed to avoid it. + * We can't use __pa(high_memory) directly, since high_memory + * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly) + * complain. Find the boundary by adding one to the last valid + * address. */ - highmem_start = __pa_nodebug(high_memory); -#else - highmem_start = __pa(high_memory); -#endif + highmem_start = __pa(high_memory - 1) + 1; pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); From 568c5fe5a54f2654f5a4c599c45b8a62ed9a2013 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:42 -0800 Subject: [PATCH 33/57] mm: Introduce lm_alias Certain architectures may have the kernel image mapped separately to alias the linear map. Introduce a macro lm_alias to translate a kernel image symbol into its linear alias. This is used in part with work to add CONFIG_DEBUG_VIRTUAL support for arm64. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index fe6b4036664a..5dc9c4650522 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_mostly; #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) #endif +#ifndef lm_alias +#define lm_alias(x) __va(__pa_symbol(x)) +#endif + /* * To prevent common memory management code establishing * a zero page mapping on a read fault. From b6e92aa81038ce57d298a87b4c44285a1d456c3e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:43 -0800 Subject: [PATCH 34/57] kexec: Switch to __pa_symbol __pa_symbol is the correct api to get the physical address of kernel symbols. Switch to it to allow for better debug checking. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Acked-by: "Eric W. Biederman" Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- kernel/kexec_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5617cc412444..a01974e1bf6b 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1399,7 +1399,7 @@ void __weak arch_crash_save_vmcoreinfo(void) phys_addr_t __weak paddr_vmcoreinfo_note(void) { - return __pa((unsigned long)(char *)&vmcoreinfo_note); + return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note); } static int __init crash_save_vmcoreinfo_init(void) From 5c6a84a3f4558a6115fef1b59343c7ae56b3abc3 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:44 -0800 Subject: [PATCH 35/57] mm/kasan: Switch to using __pa_symbol and lm_alias __pa_symbol is the correct API to find the physical address of symbols. Switch to it to allow for debugging APIs to work correctly. Other functions such as p*d_populate may call __pa internally. Ensure that the address passed is in the linear region by calling lm_alias. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- mm/kasan/kasan_init.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 3f9a41cf0ac6..31238dad85fb 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr, pte_t *pte = pte_offset_kernel(pmd, addr); pte_t zero_pte; - zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL); + zero_pte = pfn_pte(PFN_DOWN(__pa_symbol(kasan_zero_page)), PAGE_KERNEL); zero_pte = pte_wrprotect(zero_pte); while (addr + PAGE_SIZE <= end) { @@ -69,7 +70,7 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, next = pmd_addr_end(addr, end); if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); continue; } @@ -92,9 +93,9 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { pmd_t *pmd; - pud_populate(&init_mm, pud, kasan_zero_pmd); + pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); continue; } @@ -135,11 +136,11 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ - pgd_populate(&init_mm, pgd, kasan_zero_pud); + pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, kasan_zero_pmd); + pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); continue; } From 46f6236aa1c48e06b10a2fe0386ac4114b438622 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:45 -0800 Subject: [PATCH 36/57] mm/usercopy: Switch to using lm_alias The usercopy checking code currently calls __va(__pa(...)) to check for aliases on symbols. Switch to using lm_alias instead. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- mm/usercopy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 3c8da0af9695..8345299e3e3b 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -108,13 +108,13 @@ static inline const char *check_kernel_text_object(const void *ptr, * __pa() is not just the reverse of __va(). This can be detected * and checked: */ - textlow_linear = (unsigned long)__va(__pa(textlow)); + textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) return NULL; /* Check the secondary mapping... */ - texthigh_linear = (unsigned long)__va(__pa(texthigh)); + texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) return ""; From 1a08e3d9e0ac4577ba89dbdb38f593fe050f88fc Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:46 -0800 Subject: [PATCH 37/57] drivers: firmware: psci: Use __pa_symbol for kernel symbol __pa_symbol is technically the macro that should be used for kernel symbols. Switch to this as a pre-requisite for DEBUG_VIRTUAL which will do bounds checking. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- drivers/firmware/psci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 6c60a5087caf..66a8793f3b37 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -383,7 +383,7 @@ static int psci_suspend_finisher(unsigned long index) u32 *state = __this_cpu_read(psci_power_state); return psci_ops.cpu_suspend(state[index - 1], - virt_to_phys(cpu_resume)); + __pa_symbol(cpu_resume)); } int psci_cpu_suspend_enter(unsigned long index) From c466bda60510eaac797e49db5d34555876d983ae Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Feb 2017 12:00:16 +0000 Subject: [PATCH 38/57] ARM: add CPU_THUMB_CAPABLE to indicate possible Thumb support Clean up arch/arm/mm/Kconfig a little to provide a symbol which indicates whether the CPU may support the Thumb instruction set. This gets rid of the growing dependencies on ARM_THUMB, and also gives us a useful Kconfig symbol for choosing the kuser code. Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/Kconfig | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index f68e8ec29447..ac395eca7dee 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -29,6 +29,7 @@ config CPU_ARM720T select CPU_COPY_V4WT if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WT if MMU help A 32-bit RISC processor with 8kByte Cache, Write Buffer and @@ -46,6 +47,7 @@ config CPU_ARM740T select CPU_CACHE_V4 select CPU_CP15_MPU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE help A 32-bit RISC processor with 8KB cache or 4KB variants, write buffer and MPU(Protection Unit) built around @@ -79,6 +81,7 @@ config CPU_ARM920T select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help The ARM920T is licensed to be produced by numerous vendors, @@ -97,6 +100,7 @@ config CPU_ARM922T select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help The ARM922T is a version of the ARM920T, but with smaller @@ -116,6 +120,7 @@ config CPU_ARM925T select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help The ARM925T is a mix between the ARM920T and ARM926T, but with @@ -134,6 +139,7 @@ config CPU_ARM926T select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help This is a variant of the ARM920. It has slightly different @@ -170,6 +176,7 @@ config CPU_ARM940T select CPU_CACHE_VIVT select CPU_CP15_MPU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE help ARM940T is a member of the ARM9TDMI family of general- purpose microprocessors with MPU and separate 4KB @@ -188,6 +195,7 @@ config CPU_ARM946E select CPU_CACHE_VIVT select CPU_CP15_MPU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE help ARM946E-S is a member of the ARM9E-S family of high- performance, 32-bit system-on-chip processor solutions. @@ -206,6 +214,7 @@ config CPU_ARM1020 select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help The ARM1020 is the 32K cached version of the ARM10 processor, @@ -225,6 +234,7 @@ config CPU_ARM1020E select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU # ARM1022E @@ -236,6 +246,7 @@ config CPU_ARM1022 select CPU_COPY_V4WB if MMU # can probably do better select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help The ARM1022E is an implementation of the ARMv5TE architecture @@ -254,6 +265,7 @@ config CPU_ARM1026 select CPU_COPY_V4WB if MMU # can probably do better select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU help The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture @@ -302,6 +314,7 @@ config CPU_XSCALE select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU # XScale Core Version 3 @@ -312,6 +325,7 @@ config CPU_XSC3 select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU select IO_36 @@ -324,6 +338,7 @@ config CPU_MOHAWK select CPU_COPY_V4WB if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_V4WBI if MMU # Feroceon @@ -335,6 +350,7 @@ config CPU_FEROCEON select CPU_COPY_FEROCEON if MMU select CPU_CP15_MMU select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE select CPU_TLB_FEROCEON if MMU config CPU_FEROCEON_OLD_ID @@ -367,6 +383,7 @@ config CPU_V6 select CPU_CP15_MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V6 + select CPU_THUMB_CAPABLE select CPU_TLB_V6 if MMU # ARMv6k @@ -381,6 +398,7 @@ config CPU_V6K select CPU_CP15_MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V6 + select CPU_THUMB_CAPABLE select CPU_TLB_V6 if MMU # ARMv7 @@ -396,6 +414,7 @@ config CPU_V7 select CPU_CP15_MPU if !MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V7 + select CPU_THUMB_CAPABLE select CPU_TLB_V7 if MMU # ARMv7M @@ -410,11 +429,17 @@ config CPU_V7M config CPU_THUMBONLY bool + select CPU_THUMB_CAPABLE # There are no CPUs available with MMU that don't implement an ARM ISA: depends on !MMU help Select this if your CPU doesn't support the 32 bit ARM instructions. +config CPU_THUMB_CAPABLE + bool + help + Select this if your CPU can support Thumb mode. + # Figure out what processor architecture version we should be using. # This defines the compiler instruction set which depends on the machine type. config CPU_32v3 @@ -655,11 +680,7 @@ config ARCH_DMA_ADDR_T_64BIT config ARM_THUMB bool "Support Thumb user binaries" if !CPU_THUMBONLY - depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \ - CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \ - CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ - CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \ - CPU_V7 || CPU_FEROCEON || CPU_V7M + depends on CPU_THUMB_CAPABLE default y help Say Y if you want to include kernel support for running user space From 374d446d25d6271ee615952a3b7f123ba4983c35 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 13 Jan 2017 22:51:08 +0100 Subject: [PATCH 39/57] ARM: 8636/1: Cleanup sanity_check_meminfo The logic for sanity_check_meminfo has become difficult to follow. Clean up the code so it's more obvious what the code is actually trying to do. Additionally, meminfo is now removed so rename the function to better describe its purpose. Tested-by: Magnus Lilja Reviewed-by: Nicolas Pitre Signed-off-by: Laura Abbott Signed-off-by: Laura Abbott Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 4 +-- arch/arm/mm/mmu.c | 66 +++++++++++++++-------------------------- arch/arm/mm/nommu.c | 8 ++--- 3 files changed, 30 insertions(+), 48 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 34e3f3c45634..8a8051cf57d1 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -81,7 +81,7 @@ __setup("fpe=", fpe_setup); extern void init_default_cache_policy(unsigned long); extern void paging_init(const struct machine_desc *desc); extern void early_paging_init(const struct machine_desc *); -extern void sanity_check_meminfo(void); +extern void adjust_lowmem_bounds(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -1093,7 +1093,7 @@ void __init setup_arch(char **cmdline_p) setup_dma_zone(mdesc); xen_early_init(); efi_init(); - sanity_check_meminfo(); + adjust_lowmem_bounds(); arm_memblock_init(mdesc); early_ioremap_reset(); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4001dd15818d..b8f70a3bb7f8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1152,13 +1152,11 @@ early_param("vmalloc", early_vmalloc); phys_addr_t arm_lowmem_limit __initdata = 0; -void __init sanity_check_meminfo(void) +void __init adjust_lowmem_bounds(void) { phys_addr_t memblock_limit = 0; - int highmem = 0; u64 vmalloc_limit; struct memblock_region *reg; - bool should_use_highmem = false; /* * Let's use our own (unoptimized) equivalent of __pa() that is @@ -1172,43 +1170,18 @@ void __init sanity_check_meminfo(void) for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; - phys_addr_t size_limit = reg->size; - if (reg->base >= vmalloc_limit) - highmem = 1; - else - size_limit = vmalloc_limit - reg->base; - - - if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { - - if (highmem) { - pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", - &block_start, &block_end); - memblock_remove(reg->base, reg->size); - should_use_highmem = true; - continue; - } - - if (reg->size > size_limit) { - phys_addr_t overlap_size = reg->size - size_limit; - - pr_notice("Truncating RAM at %pa-%pa", - &block_start, &block_end); - block_end = vmalloc_limit; - pr_cont(" to -%pa", &block_end); - memblock_remove(vmalloc_limit, overlap_size); - should_use_highmem = true; - } - } - - if (!highmem) { - if (block_end > arm_lowmem_limit) { - if (reg->size > size_limit) - arm_lowmem_limit = vmalloc_limit; - else - arm_lowmem_limit = block_end; - } + if (reg->base < vmalloc_limit) { + if (block_end > arm_lowmem_limit) + /* + * Compare as u64 to ensure vmalloc_limit does + * not get truncated. block_end should always + * fit in phys_addr_t so there should be no + * issue with assignment. + */ + arm_lowmem_limit = min_t(u64, + vmalloc_limit, + block_end); /* * Find the first non-pmd-aligned page, and point @@ -1233,9 +1206,6 @@ void __init sanity_check_meminfo(void) } } - if (should_use_highmem) - pr_notice("Consider using a HIGHMEM enabled kernel.\n"); - high_memory = __va(arm_lowmem_limit - 1) + 1; /* @@ -1248,6 +1218,18 @@ void __init sanity_check_meminfo(void) if (!memblock_limit) memblock_limit = arm_lowmem_limit; + if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { + if (memblock_end_of_DRAM() > arm_lowmem_limit) { + phys_addr_t end = memblock_end_of_DRAM(); + + pr_notice("Ignoring RAM at %pa-%pa\n", + &memblock_limit, &end); + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + + memblock_remove(memblock_limit, end - memblock_limit); + } + } + memblock_set_current_limit(memblock_limit); } diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 2740967727e2..13a25d6282f8 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -85,7 +85,7 @@ static unsigned long irbar_read(void) } /* MPU initialisation functions */ -void __init sanity_check_meminfo_mpu(void) +void __init adjust_lowmem_bounds_mpu(void) { phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; @@ -274,7 +274,7 @@ void __init mpu_setup(void) } } #else -static void sanity_check_meminfo_mpu(void) {} +static void adjust_lowmem_bounds_mpu(void) {} static void __init mpu_setup(void) {} #endif /* CONFIG_ARM_MPU */ @@ -295,10 +295,10 @@ void __init arm_mm_memblock_reserve(void) #endif } -void __init sanity_check_meminfo(void) +void __init adjust_lowmem_bounds(void) { phys_addr_t end; - sanity_check_meminfo_mpu(); + adjust_lowmem_bounds_mpu(); end = memblock_end_of_DRAM(); high_memory = __va(end - 1) + 1; memblock_set_current_limit(end); From 985626564eedc470ce2866e53938303368ad41b7 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 13 Jan 2017 22:51:45 +0100 Subject: [PATCH 40/57] ARM: 8637/1: Adjust memory boundaries after reservations adjust_lowmem_bounds is responsible for setting up the boundary for lowmem/highmem. This needs to be setup before memblock reservations can occur. At the time memblock reservations can occur, memory can also be removed from the system. The lowmem/highmem boundary and end of memory may be affected by this but it is currently not recalculated. On some systems this may be harmless, on others this may result in incorrect ranges being passed to the main memory allocator. Correct this by recalculating the lowmem/highmem boundary after all reservations have been made. Tested-by: Magnus Lilja Signed-off-by: Laura Abbott Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 6 ++++++ arch/arm/mm/mmu.c | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8a8051cf57d1..f4e54503afa9 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1093,8 +1093,14 @@ void __init setup_arch(char **cmdline_p) setup_dma_zone(mdesc); xen_early_init(); efi_init(); + /* + * Make sure the calculation for lowmem/highmem is set appropriately + * before reserving/allocating any mmeory + */ adjust_lowmem_bounds(); arm_memblock_init(mdesc); + /* Memory may have been removed so recalculate the bounds. */ + adjust_lowmem_bounds(); early_ioremap_reset(); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index b8f70a3bb7f8..5cbfd9f86412 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1157,6 +1157,7 @@ void __init adjust_lowmem_bounds(void) phys_addr_t memblock_limit = 0; u64 vmalloc_limit; struct memblock_region *reg; + phys_addr_t lowmem_limit = 0; /* * Let's use our own (unoptimized) equivalent of __pa() that is @@ -1172,14 +1173,14 @@ void __init adjust_lowmem_bounds(void) phys_addr_t block_end = reg->base + reg->size; if (reg->base < vmalloc_limit) { - if (block_end > arm_lowmem_limit) + if (block_end > lowmem_limit) /* * Compare as u64 to ensure vmalloc_limit does * not get truncated. block_end should always * fit in phys_addr_t so there should be no * issue with assignment. */ - arm_lowmem_limit = min_t(u64, + lowmem_limit = min_t(u64, vmalloc_limit, block_end); @@ -1200,12 +1201,14 @@ void __init adjust_lowmem_bounds(void) if (!IS_ALIGNED(block_start, PMD_SIZE)) memblock_limit = block_start; else if (!IS_ALIGNED(block_end, PMD_SIZE)) - memblock_limit = arm_lowmem_limit; + memblock_limit = lowmem_limit; } } } + arm_lowmem_limit = lowmem_limit; + high_memory = __va(arm_lowmem_limit - 1) + 1; /* From 4e23612bb09809ef89eb396fd5f568392e0f1ec7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 15 Jan 2017 03:57:03 +0100 Subject: [PATCH 41/57] ARM: 8638/1: mtd: lart: Rename partition defines to be prefixed with PART_ In preparation for defining KERNEL_START on ARM, rename KERNEL_START to PART_KERNEL_START, and to be consistent, do this for all partition-related constants. Acked-by: Boris Brezillon Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- drivers/mtd/devices/lart.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c index 82bd00af5cc3..268aae45b514 100644 --- a/drivers/mtd/devices/lart.c +++ b/drivers/mtd/devices/lart.c @@ -75,18 +75,18 @@ static char module_name[] = "lart"; /* blob */ #define NUM_BLOB_BLOCKS FLASH_NUMBLOCKS_16m_PARAM -#define BLOB_START 0x00000000 -#define BLOB_LEN (NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM) +#define PART_BLOB_START 0x00000000 +#define PART_BLOB_LEN (NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM) /* kernel */ #define NUM_KERNEL_BLOCKS 7 -#define KERNEL_START (BLOB_START + BLOB_LEN) -#define KERNEL_LEN (NUM_KERNEL_BLOCKS * FLASH_BLOCKSIZE_MAIN) +#define PART_KERNEL_START (PART_BLOB_START + PART_BLOB_LEN) +#define PART_KERNEL_LEN (NUM_KERNEL_BLOCKS * FLASH_BLOCKSIZE_MAIN) /* initial ramdisk */ #define NUM_INITRD_BLOCKS 24 -#define INITRD_START (KERNEL_START + KERNEL_LEN) -#define INITRD_LEN (NUM_INITRD_BLOCKS * FLASH_BLOCKSIZE_MAIN) +#define PART_INITRD_START (PART_KERNEL_START + PART_KERNEL_LEN) +#define PART_INITRD_LEN (NUM_INITRD_BLOCKS * FLASH_BLOCKSIZE_MAIN) /* * See section 4.0 in "3 Volt Fast Boot Block Flash Memory" Intel Datasheet @@ -587,20 +587,20 @@ static struct mtd_partition lart_partitions[] = { /* blob */ { .name = "blob", - .offset = BLOB_START, - .size = BLOB_LEN, + .offset = PART_BLOB_START, + .size = PART_BLOB_LEN, }, /* kernel */ { .name = "kernel", - .offset = KERNEL_START, /* MTDPART_OFS_APPEND */ - .size = KERNEL_LEN, + .offset = PART_KERNEL_START, /* MTDPART_OFS_APPEND */ + .size = PART_KERNEL_LEN, }, /* initial ramdisk / file system */ { .name = "file system", - .offset = INITRD_START, /* MTDPART_OFS_APPEND */ - .size = INITRD_LEN, /* MTDPART_SIZ_FULL */ + .offset = PART_INITRD_START, /* MTDPART_OFS_APPEND */ + .size = PART_INITRD_LEN, /* MTDPART_SIZ_FULL */ } }; #define NUM_PARTITIONS ARRAY_SIZE(lart_partitions) From a09975bf6c756e4555a95258ff4b2286dcfddc4e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 15 Jan 2017 03:57:40 +0100 Subject: [PATCH 42/57] ARM: 8639/1: Define KERNEL_START and KERNEL_END In preparation for adding CONFIG_DEBUG_VIRTUAL support, define a set of common constants: KERNEL_START and KERNEL_END which abstract CONFIG_XIP_KERNEL vs. !CONFIG_XIP_KERNEL. Update the code where relevant. Acked-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 7 +++++++ arch/arm/mm/init.c | 7 ++----- arch/arm/mm/mmu.c | 6 +----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 76cbd9c674df..bee7511c5098 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -111,6 +111,13 @@ #endif /* !CONFIG_MMU */ +#ifdef CONFIG_XIP_KERNEL +#define KERNEL_START _sdata +#else +#define KERNEL_START _stext +#endif +#define KERNEL_END _end + /* * We fix the TCM memories max 32 KiB ITCM resp DTCM at these * locations diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 370581aeb871..4127f578086c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -230,11 +230,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) void __init arm_memblock_init(const struct machine_desc *mdesc) { /* Register the kernel text, kernel data and initrd with memblock. */ -#ifdef CONFIG_XIP_KERNEL - memblock_reserve(__pa(_sdata), _end - _sdata); -#else - memblock_reserve(__pa(_stext), _end - _stext); -#endif + memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); + #ifdef CONFIG_BLK_DEV_INITRD /* FDT scan will populate initrd_start */ if (initrd_start && !phys_initrd_size) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5cbfd9f86412..4e016d7f37b3 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1422,11 +1422,7 @@ static void __init kmap_init(void) static void __init map_lowmem(void) { struct memblock_region *reg; -#ifdef CONFIG_XIP_KERNEL - phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE); -#else - phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); -#endif + phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE); phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ From e377cd8221ebbe0b517861aa3d823bb42f9abbd4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 15 Jan 2017 03:59:00 +0100 Subject: [PATCH 43/57] ARM: 8640/1: Add support for CONFIG_DEBUG_VIRTUAL x86 has an option: CONFIG_DEBUG_VIRTUAL to do additional checks on virt_to_phys calls. The goal is to catch users who are calling virt_to_phys on non-linear addresses immediately. This includes caller using __virt_to_phys() on image addresses instead of __pa_symbol(). This is a generally useful debug feature to spot bad code (particulary in drivers). Acked-by: Russell King Acked-by: Laura Abbott Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/include/asm/memory.h | 15 +++++++-- arch/arm/mm/Makefile | 1 + arch/arm/mm/physaddr.c | 57 +++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 arch/arm/mm/physaddr.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5fab553fd03a..4700294f4e09 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2,6 +2,7 @@ config ARM bool default y select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index bee7511c5098..c30d0d82a105 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -213,7 +213,7 @@ extern const void *__pv_table_begin, *__pv_table_end; : "r" (x), "I" (__PV_BITS_31_24) \ : "cc") -static inline phys_addr_t __virt_to_phys(unsigned long x) +static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { phys_addr_t t; @@ -245,7 +245,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) #define PHYS_OFFSET PLAT_PHYS_OFFSET #define PHYS_PFN_OFFSET ((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT)) -static inline phys_addr_t __virt_to_phys(unsigned long x) +static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET; } @@ -261,6 +261,16 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) ((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \ PHYS_PFN_OFFSET) +#define __pa_symbol_nodebug(x) __virt_to_phys_nodebug((x)) + +#ifdef CONFIG_DEBUG_VIRTUAL +extern phys_addr_t __virt_to_phys(unsigned long x); +extern phys_addr_t __phys_addr_symbol(unsigned long x); +#else +#define __virt_to_phys(x) __virt_to_phys_nodebug(x) +#define __phys_addr_symbol(x) __pa_symbol_nodebug(x) +#endif + /* * These are *only* valid on the kernel direct mapped RAM memory. * Note: Drivers should NOT use these. They are the wrong @@ -283,6 +293,7 @@ static inline void *phys_to_virt(phys_addr_t x) * Drivers should NOT use these either. */ #define __pa(x) __virt_to_phys((unsigned long)(x)) +#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT) diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index e8698241ece9..b3dea80715b4 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -14,6 +14,7 @@ endif obj-$(CONFIG_ARM_PTDUMP) += dump.o obj-$(CONFIG_MODULES) += proc-syms.o +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/arm/mm/physaddr.c b/arch/arm/mm/physaddr.c new file mode 100644 index 000000000000..02e60f495608 --- /dev/null +++ b/arch/arm/mm/physaddr.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mm.h" + +static inline bool __virt_addr_valid(unsigned long x) +{ + /* + * high_memory does not get immediately defined, and there + * are early callers of __pa() against PAGE_OFFSET + */ + if (!high_memory && x >= PAGE_OFFSET) + return true; + + if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory) + return true; + + /* + * MAX_DMA_ADDRESS is a virtual address that may not correspond to an + * actual physical address. Enough code relies on __pa(MAX_DMA_ADDRESS) + * that we just need to work around it and always return true. + */ + if (x == MAX_DMA_ADDRESS) + return true; + + return false; +} + +phys_addr_t __virt_to_phys(unsigned long x) +{ + WARN(!__virt_addr_valid(x), + "virt_to_phys used for non-linear address: %pK (%pS)\n", + (void *)x, (void *)x); + + return __virt_to_phys_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + /* This is bounds checking against the kernel image only. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < (unsigned long)KERNEL_START || + x > (unsigned long)KERNEL_END); + + return __pa_symbol_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); From 64fc2a947a9873700929ec0ef02b4654a04e0476 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 15 Jan 2017 03:59:29 +0100 Subject: [PATCH 44/57] ARM: 8641/1: treewide: Replace uses of virt_to_phys with __pa_symbol All low-level PM/SMP code using virt_to_phys() should actually use __pa_symbol() against kernel symbols. Update code where relevant to move away from virt_to_phys(). Acked-by: Russell King Reviewed-by: Laura Abbott Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- arch/arm/common/mcpm_entry.c | 12 ++++++------ arch/arm/mach-alpine/platsmp.c | 2 +- arch/arm/mach-axxia/platsmp.c | 2 +- arch/arm/mach-bcm/bcm63xx_smp.c | 2 +- arch/arm/mach-bcm/platsmp-brcmstb.c | 2 +- arch/arm/mach-bcm/platsmp.c | 4 ++-- arch/arm/mach-berlin/platsmp.c | 2 +- arch/arm/mach-exynos/firmware.c | 4 ++-- arch/arm/mach-exynos/mcpm-exynos.c | 2 +- arch/arm/mach-exynos/platsmp.c | 4 ++-- arch/arm/mach-exynos/pm.c | 6 +++--- arch/arm/mach-exynos/suspend.c | 6 +++--- arch/arm/mach-hisi/platmcpm.c | 2 +- arch/arm/mach-hisi/platsmp.c | 6 +++--- arch/arm/mach-imx/platsmp.c | 2 +- arch/arm/mach-imx/pm-imx6.c | 2 +- arch/arm/mach-imx/src.c | 2 +- arch/arm/mach-mediatek/platsmp.c | 2 +- arch/arm/mach-mvebu/pm.c | 2 +- arch/arm/mach-mvebu/pmsu.c | 2 +- arch/arm/mach-mvebu/system-controller.c | 2 +- arch/arm/mach-omap2/control.c | 8 ++++---- arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 ++++++------ arch/arm/mach-omap2/omap-smp.c | 4 ++-- arch/arm/mach-prima2/platsmp.c | 2 +- arch/arm/mach-prima2/pm.c | 2 +- arch/arm/mach-pxa/palmz72.c | 2 +- arch/arm/mach-pxa/pxa25x.c | 2 +- arch/arm/mach-pxa/pxa27x.c | 2 +- arch/arm/mach-pxa/pxa3xx.c | 2 +- arch/arm/mach-realview/platsmp-dt.c | 2 +- arch/arm/mach-rockchip/platsmp.c | 4 ++-- arch/arm/mach-rockchip/pm.c | 2 +- arch/arm/mach-s3c24xx/mach-jive.c | 2 +- arch/arm/mach-s3c24xx/pm-s3c2410.c | 2 +- arch/arm/mach-s3c24xx/pm-s3c2416.c | 2 +- arch/arm/mach-s3c64xx/pm.c | 2 +- arch/arm/mach-s5pv210/pm.c | 2 +- arch/arm/mach-sa1100/pm.c | 2 +- arch/arm/mach-shmobile/platsmp-apmu.c | 6 +++--- arch/arm/mach-shmobile/platsmp-scu.c | 4 ++-- arch/arm/mach-socfpga/platsmp.c | 4 ++-- arch/arm/mach-spear/platsmp.c | 2 +- arch/arm/mach-sti/platsmp.c | 2 +- arch/arm/mach-sunxi/platsmp.c | 4 ++-- arch/arm/mach-tango/platsmp.c | 2 +- arch/arm/mach-tango/pm.c | 2 +- arch/arm/mach-tegra/reset.c | 4 ++-- arch/arm/mach-ux500/platsmp.c | 2 +- arch/arm/mach-vexpress/dcscb.c | 2 +- arch/arm/mach-vexpress/platsmp.c | 2 +- arch/arm/mach-vexpress/tc2_pm.c | 4 ++-- arch/arm/mach-zx/platsmp.c | 4 ++-- arch/arm/mach-zynq/platsmp.c | 2 +- 54 files changed, 86 insertions(+), 86 deletions(-) diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index a923524d1040..cf062472e07b 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -144,7 +144,7 @@ extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) { - unsigned long val = ptr ? virt_to_phys(ptr) : 0; + unsigned long val = ptr ? __pa_symbol(ptr) : 0; mcpm_entry_vectors[cluster][cpu] = val; sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); } @@ -299,8 +299,8 @@ void mcpm_cpu_power_down(void) * the kernel as if the power_up method just had deasserted reset * on the CPU. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset(virt_to_phys(mcpm_entry_point)); + phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); + phys_reset(__pa_symbol(mcpm_entry_point)); /* should never get here */ BUG(); @@ -388,8 +388,8 @@ static int __init nocache_trampoline(unsigned long _arg) __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); __mcpm_cpu_down(cpu, cluster); - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset(virt_to_phys(mcpm_entry_point)); + phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); + phys_reset(__pa_symbol(mcpm_entry_point)); BUG(); } @@ -449,7 +449,7 @@ int __init mcpm_sync_init( sync_cache_w(&mcpm_sync); if (power_up_setup) { - mcpm_power_up_setup_phys = virt_to_phys(power_up_setup); + mcpm_power_up_setup_phys = __pa_symbol(power_up_setup); sync_cache_w(&mcpm_power_up_setup_phys); } diff --git a/arch/arm/mach-alpine/platsmp.c b/arch/arm/mach-alpine/platsmp.c index dd77ea25e7ca..6dc6d491f88a 100644 --- a/arch/arm/mach-alpine/platsmp.c +++ b/arch/arm/mach-alpine/platsmp.c @@ -27,7 +27,7 @@ static int alpine_boot_secondary(unsigned int cpu, struct task_struct *idle) { phys_addr_t addr; - addr = virt_to_phys(secondary_startup); + addr = __pa_symbol(secondary_startup); if (addr > (phys_addr_t)(uint32_t)(-1)) { pr_err("FAIL: resume address over 32bit (%pa)", &addr); diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index ffbd71d45008..502e3df69f69 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -25,7 +25,7 @@ static void write_release_addr(u32 release_phys) { u32 *virt = (u32 *) phys_to_virt(release_phys); - writel_relaxed(virt_to_phys(secondary_startup), virt); + writel_relaxed(__pa_symbol(secondary_startup), virt); /* Make sure this store is visible to other CPUs */ smp_wmb(); __cpuc_flush_dcache_area(virt, sizeof(u32)); diff --git a/arch/arm/mach-bcm/bcm63xx_smp.c b/arch/arm/mach-bcm/bcm63xx_smp.c index 9b6727ed68cd..f5fb10b4376f 100644 --- a/arch/arm/mach-bcm/bcm63xx_smp.c +++ b/arch/arm/mach-bcm/bcm63xx_smp.c @@ -135,7 +135,7 @@ static int bcm63138_smp_boot_secondary(unsigned int cpu, } /* Write the secondary init routine to the BootLUT reset vector */ - val = virt_to_phys(secondary_startup); + val = __pa_symbol(secondary_startup); writel_relaxed(val, bootlut_base + BOOTLUT_RESET_VECT); /* Power up the core, will jump straight to its reset vector when we diff --git a/arch/arm/mach-bcm/platsmp-brcmstb.c b/arch/arm/mach-bcm/platsmp-brcmstb.c index 40dc8448445e..12379960e982 100644 --- a/arch/arm/mach-bcm/platsmp-brcmstb.c +++ b/arch/arm/mach-bcm/platsmp-brcmstb.c @@ -151,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu) * Set the reset vector to point to the secondary_startup * routine */ - cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup)); + cpu_set_boot_addr(cpu, __pa_symbol(secondary_startup)); /* Unhalt the cpu */ cpu_rst_cfg_set(cpu, 0); diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c index 3ac3a9bc663c..582886d0d02f 100644 --- a/arch/arm/mach-bcm/platsmp.c +++ b/arch/arm/mach-bcm/platsmp.c @@ -116,7 +116,7 @@ static int nsp_write_lut(unsigned int cpu) return -ENOMEM; } - secondary_startup_phy = virt_to_phys(secondary_startup); + secondary_startup_phy = __pa_symbol(secondary_startup); BUG_ON(secondary_startup_phy > (phys_addr_t)U32_MAX); writel_relaxed(secondary_startup_phy, sku_rom_lut); @@ -189,7 +189,7 @@ static int kona_boot_secondary(unsigned int cpu, struct task_struct *idle) * Secondary cores will start in secondary_startup(), * defined in "arch/arm/kernel/head.S" */ - boot_func = virt_to_phys(secondary_startup); + boot_func = __pa_symbol(secondary_startup); BUG_ON(boot_func & BOOT_ADDR_CPUID_MASK); BUG_ON(boot_func > (phys_addr_t)U32_MAX); diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c index 93f90688db18..1167b0ed92c8 100644 --- a/arch/arm/mach-berlin/platsmp.c +++ b/arch/arm/mach-berlin/platsmp.c @@ -92,7 +92,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus) * Write the secondary startup address into the SW reset address * vector. This is used by boot_inst. */ - writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR); + writel(__pa_symbol(secondary_startup), vectors_base + SW_RESET_ADDR); iounmap(vectors_base); unmap_scu: diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c index fd6da5419b51..e81a78b125d9 100644 --- a/arch/arm/mach-exynos/firmware.c +++ b/arch/arm/mach-exynos/firmware.c @@ -41,7 +41,7 @@ static int exynos_do_idle(unsigned long mode) case FW_DO_IDLE_AFTR: if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) exynos_save_cp15(); - writel_relaxed(virt_to_phys(exynos_cpu_resume_ns), + writel_relaxed(__pa_symbol(exynos_cpu_resume_ns), sysram_ns_base_addr + 0x24); writel_relaxed(EXYNOS_AFTR_MAGIC, sysram_ns_base_addr + 0x20); if (soc_is_exynos3250()) { @@ -135,7 +135,7 @@ static int exynos_suspend(void) exynos_save_cp15(); writel(EXYNOS_SLEEP_MAGIC, sysram_ns_base_addr + EXYNOS_BOOT_FLAG); - writel(virt_to_phys(exynos_cpu_resume_ns), + writel(__pa_symbol(exynos_cpu_resume_ns), sysram_ns_base_addr + EXYNOS_BOOT_ADDR); return cpu_suspend(0, exynos_cpu_suspend); diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index f086bf615b29..214a9cfa92e9 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -221,7 +221,7 @@ static void exynos_mcpm_setup_entry_point(void) */ __raw_writel(0xe59f0000, ns_sram_base_addr); /* ldr r0, [pc, #0] */ __raw_writel(0xe12fff10, ns_sram_base_addr + 4); /* bx r0 */ - __raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 8); + __raw_writel(__pa_symbol(mcpm_entry_point), ns_sram_base_addr + 8); } static struct syscore_ops exynos_mcpm_syscore_ops = { diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 98ffe1e62ad5..9f4949f7ed88 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -353,7 +353,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) smp_rmb(); - boot_addr = virt_to_phys(exynos4_secondary_startup); + boot_addr = __pa_symbol(exynos4_secondary_startup); ret = exynos_set_boot_addr(core_id, boot_addr); if (ret) @@ -443,7 +443,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) mpidr = cpu_logical_map(i); core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - boot_addr = virt_to_phys(exynos4_secondary_startup); + boot_addr = __pa_symbol(exynos4_secondary_startup); ret = exynos_set_boot_addr(core_id, boot_addr); if (ret) diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c index 487295f4a56b..1a7e5b5d08d8 100644 --- a/arch/arm/mach-exynos/pm.c +++ b/arch/arm/mach-exynos/pm.c @@ -132,7 +132,7 @@ static void exynos_set_wakeupmask(long mask) static void exynos_cpu_set_boot_vector(long flags) { - writel_relaxed(virt_to_phys(exynos_cpu_resume), + writel_relaxed(__pa_symbol(exynos_cpu_resume), exynos_boot_vector_addr()); writel_relaxed(flags, exynos_boot_vector_flag()); } @@ -238,7 +238,7 @@ static int exynos_cpu0_enter_aftr(void) abort: if (cpu_online(1)) { - unsigned long boot_addr = virt_to_phys(exynos_cpu_resume); + unsigned long boot_addr = __pa_symbol(exynos_cpu_resume); /* * Set the boot vector to something non-zero @@ -330,7 +330,7 @@ cpu1_aborted: static void exynos_pre_enter_aftr(void) { - unsigned long boot_addr = virt_to_phys(exynos_cpu_resume); + unsigned long boot_addr = __pa_symbol(exynos_cpu_resume); (void)exynos_set_boot_addr(1, boot_addr); } diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 06332f626565..97765be2cc12 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -344,7 +344,7 @@ static void exynos_pm_prepare(void) exynos_pm_enter_sleep_mode(); /* ensure at least INFORM0 has the resume address */ - pmu_raw_writel(virt_to_phys(exynos_cpu_resume), S5P_INFORM0); + pmu_raw_writel(__pa_symbol(exynos_cpu_resume), S5P_INFORM0); } static void exynos3250_pm_prepare(void) @@ -361,7 +361,7 @@ static void exynos3250_pm_prepare(void) exynos_pm_enter_sleep_mode(); /* ensure at least INFORM0 has the resume address */ - pmu_raw_writel(virt_to_phys(exynos_cpu_resume), S5P_INFORM0); + pmu_raw_writel(__pa_symbol(exynos_cpu_resume), S5P_INFORM0); } static void exynos5420_pm_prepare(void) @@ -386,7 +386,7 @@ static void exynos5420_pm_prepare(void) /* ensure at least INFORM0 has the resume address */ if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM)) - pmu_raw_writel(virt_to_phys(mcpm_entry_point), S5P_INFORM0); + pmu_raw_writel(__pa_symbol(mcpm_entry_point), S5P_INFORM0); tmp = pmu_raw_readl(EXYNOS5_ARM_L2_OPTION); tmp &= ~EXYNOS5_USE_RETENTION; diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 4b653a8cb75c..a6c117622d67 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -327,7 +327,7 @@ static int __init hip04_smp_init(void) */ writel_relaxed(hip04_boot_method[0], relocation); writel_relaxed(0xa5a5a5a5, relocation + 4); /* magic number */ - writel_relaxed(virt_to_phys(secondary_startup), relocation + 8); + writel_relaxed(__pa_symbol(secondary_startup), relocation + 8); writel_relaxed(0, relocation + 12); iounmap(relocation); diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index e1d67648d5d0..91bb02dec20f 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -28,7 +28,7 @@ void hi3xxx_set_cpu_jump(int cpu, void *jump_addr) cpu = cpu_logical_map(cpu); if (!cpu || !ctrl_base) return; - writel_relaxed(virt_to_phys(jump_addr), ctrl_base + ((cpu - 1) << 2)); + writel_relaxed(__pa_symbol(jump_addr), ctrl_base + ((cpu - 1) << 2)); } int hi3xxx_get_cpu_jump(int cpu) @@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle) { phys_addr_t jumpaddr; - jumpaddr = virt_to_phys(secondary_startup); + jumpaddr = __pa_symbol(secondary_startup); hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr); hix5hd2_set_cpu(cpu, true); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) struct device_node *node; - jumpaddr = virt_to_phys(secondary_startup); + jumpaddr = __pa_symbol(secondary_startup); hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr); node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl"); diff --git a/arch/arm/mach-imx/platsmp.c b/arch/arm/mach-imx/platsmp.c index 711dbbd5badd..c2d1b329fba1 100644 --- a/arch/arm/mach-imx/platsmp.c +++ b/arch/arm/mach-imx/platsmp.c @@ -117,7 +117,7 @@ static void __init ls1021a_smp_prepare_cpus(unsigned int max_cpus) dcfg_base = of_iomap(np, 0); BUG_ON(!dcfg_base); - paddr = virt_to_phys(secondary_startup); + paddr = __pa_symbol(secondary_startup); writel_relaxed(cpu_to_be32(paddr), dcfg_base + DCFG_CCSR_SCRATCHRW1); iounmap(dcfg_base); diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 1515e498d348..e61b1d1027e1 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -499,7 +499,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) memset(suspend_ocram_base, 0, sizeof(*pm_info)); pm_info = suspend_ocram_base; pm_info->pbase = ocram_pbase; - pm_info->resume_addr = virt_to_phys(v7_cpu_resume); + pm_info->resume_addr = __pa_symbol(v7_cpu_resume); pm_info->pm_info_size = sizeof(*pm_info); /* diff --git a/arch/arm/mach-imx/src.c b/arch/arm/mach-imx/src.c index 70b083fe934a..495d85d0fe7e 100644 --- a/arch/arm/mach-imx/src.c +++ b/arch/arm/mach-imx/src.c @@ -99,7 +99,7 @@ void imx_enable_cpu(int cpu, bool enable) void imx_set_cpu_jump(int cpu, void *jump_addr) { cpu = cpu_logical_map(cpu); - writel_relaxed(virt_to_phys(jump_addr), + writel_relaxed(__pa_symbol(jump_addr), src_base + SRC_GPR1 + cpu * 8); } diff --git a/arch/arm/mach-mediatek/platsmp.c b/arch/arm/mach-mediatek/platsmp.c index b821e34474b6..726eb69bb655 100644 --- a/arch/arm/mach-mediatek/platsmp.c +++ b/arch/arm/mach-mediatek/platsmp.c @@ -122,7 +122,7 @@ static void __init __mtk_smp_prepare_cpus(unsigned int max_cpus, int trustzone) * write the address of slave startup address into the system-wide * jump register */ - writel_relaxed(virt_to_phys(secondary_startup_arm), + writel_relaxed(__pa_symbol(secondary_startup_arm), mtk_smp_base + mtk_smp_info->jump_reg); } diff --git a/arch/arm/mach-mvebu/pm.c b/arch/arm/mach-mvebu/pm.c index 2990c5269b18..c487be61d6d8 100644 --- a/arch/arm/mach-mvebu/pm.c +++ b/arch/arm/mach-mvebu/pm.c @@ -110,7 +110,7 @@ static void mvebu_pm_store_armadaxp_bootinfo(u32 *store_addr) { phys_addr_t resume_pc; - resume_pc = virt_to_phys(armada_370_xp_cpu_resume); + resume_pc = __pa_symbol(armada_370_xp_cpu_resume); /* * The bootloader expects the first two words to be a magic diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index f39bd51bce18..27a78c80e5b1 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -112,7 +112,7 @@ static const struct of_device_id of_pmsu_table[] = { void mvebu_pmsu_set_cpu_boot_addr(int hw_cpu, void *boot_addr) { - writel(virt_to_phys(boot_addr), pmsu_mp_base + + writel(__pa_symbol(boot_addr), pmsu_mp_base + PMSU_BOOT_ADDR_REDIRECT_OFFSET(hw_cpu)); } diff --git a/arch/arm/mach-mvebu/system-controller.c b/arch/arm/mach-mvebu/system-controller.c index 76cbc82a7407..04d9ebe6a90a 100644 --- a/arch/arm/mach-mvebu/system-controller.c +++ b/arch/arm/mach-mvebu/system-controller.c @@ -153,7 +153,7 @@ void mvebu_system_controller_set_cpu_boot_addr(void *boot_addr) if (of_machine_is_compatible("marvell,armada375")) mvebu_armada375_smp_wa_init(); - writel(virt_to_phys(boot_addr), system_controller_base + + writel(__pa_symbol(boot_addr), system_controller_base + mvebu_sc->resume_boot_addr); } #endif diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c index 1662071bb2cc..bd8089ff929f 100644 --- a/arch/arm/mach-omap2/control.c +++ b/arch/arm/mach-omap2/control.c @@ -315,15 +315,15 @@ void omap3_save_scratchpad_contents(void) scratchpad_contents.boot_config_ptr = 0x0; if (cpu_is_omap3630()) scratchpad_contents.public_restore_ptr = - virt_to_phys(omap3_restore_3630); + __pa_symbol(omap3_restore_3630); else if (omap_rev() != OMAP3430_REV_ES3_0 && omap_rev() != OMAP3430_REV_ES3_1 && omap_rev() != OMAP3430_REV_ES3_1_2) scratchpad_contents.public_restore_ptr = - virt_to_phys(omap3_restore); + __pa_symbol(omap3_restore); else scratchpad_contents.public_restore_ptr = - virt_to_phys(omap3_restore_es3); + __pa_symbol(omap3_restore_es3); if (omap_type() == OMAP2_DEVICE_TYPE_GP) scratchpad_contents.secure_ram_restore_ptr = 0x0; @@ -395,7 +395,7 @@ void omap3_save_scratchpad_contents(void) sdrc_block_contents.flags = 0x0; sdrc_block_contents.block_size = 0x0; - arm_context_addr = virt_to_phys(omap3_arm_context); + arm_context_addr = __pa_symbol(omap3_arm_context); /* Copy all the contents to the scratchpad location */ scratchpad_address = OMAP2_L4_IO_ADDRESS(OMAP343X_SCRATCHPAD); diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index 7d62ad48c7c9..113ab2dd2ee9 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -273,7 +273,7 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) cpu_clear_prev_logic_pwrst(cpu); pwrdm_set_next_pwrst(pm_info->pwrdm, power_state); pwrdm_set_logic_retst(pm_info->pwrdm, cpu_logic_state); - set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.resume)); + set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.resume)); omap_pm_ops.scu_prepare(cpu, power_state); l2x0_pwrst_prepare(cpu, save_state); @@ -325,7 +325,7 @@ int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state) pwrdm_clear_all_prev_pwrst(pm_info->pwrdm); pwrdm_set_next_pwrst(pm_info->pwrdm, power_state); - set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.hotplug_restart)); + set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.hotplug_restart)); omap_pm_ops.scu_prepare(cpu, power_state); /* @@ -467,13 +467,13 @@ void __init omap4_mpuss_early_init(void) sar_base = omap4_get_sar_ram_base(); if (cpu_is_omap443x()) - startup_pa = virt_to_phys(omap4_secondary_startup); + startup_pa = __pa_symbol(omap4_secondary_startup); else if (cpu_is_omap446x()) - startup_pa = virt_to_phys(omap4460_secondary_startup); + startup_pa = __pa_symbol(omap4460_secondary_startup); else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE) - startup_pa = virt_to_phys(omap5_secondary_hyp_startup); + startup_pa = __pa_symbol(omap5_secondary_hyp_startup); else - startup_pa = virt_to_phys(omap5_secondary_startup); + startup_pa = __pa_symbol(omap5_secondary_startup); if (cpu_is_omap44xx()) writel_relaxed(startup_pa, sar_base + diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index b4de3da6dffa..003353b0b794 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -316,9 +316,9 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus) * A barrier is added to ensure that write buffer is drained */ if (omap_secure_apis_support()) - omap_auxcoreboot_addr(virt_to_phys(cfg.startup_addr)); + omap_auxcoreboot_addr(__pa_symbol(cfg.startup_addr)); else - writel_relaxed(virt_to_phys(cfg.startup_addr), + writel_relaxed(__pa_symbol(cfg.startup_addr), base + OMAP_AUX_CORE_BOOT_1); } diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 0875b99add18..75ef5d4be554 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -65,7 +65,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) * waiting for. This would wake up the secondary core from WFE */ #define SIRFSOC_CPU1_JUMPADDR_OFFSET 0x2bc - __raw_writel(virt_to_phys(sirfsoc_secondary_startup), + __raw_writel(__pa_symbol(sirfsoc_secondary_startup), clk_base + SIRFSOC_CPU1_JUMPADDR_OFFSET); #define SIRFSOC_CPU1_WAKEMAGIC_OFFSET 0x2b8 diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c index 83e94c95e314..b0bcf1ff02dd 100644 --- a/arch/arm/mach-prima2/pm.c +++ b/arch/arm/mach-prima2/pm.c @@ -54,7 +54,7 @@ static void sirfsoc_set_sleep_mode(u32 mode) static int sirfsoc_pre_suspend_power_off(void) { - u32 wakeup_entry = virt_to_phys(cpu_resume); + u32 wakeup_entry = __pa_symbol(cpu_resume); sirfsoc_rtc_iobrg_writel(wakeup_entry, sirfsoc_pwrc_base + SIRFSOC_PWRC_SCRATCH_PAD1); diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 9c308de158c6..29630061e700 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -249,7 +249,7 @@ static int palmz72_pm_suspend(void) store_ptr = *PALMZ72_SAVE_DWORD; /* Setting PSPR to a proper value */ - PSPR = virt_to_phys(&palmz72_resume_info); + PSPR = __pa_symbol(&palmz72_resume_info); return 0; } diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index c725baf119e1..ba431fad5c47 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -85,7 +85,7 @@ static void pxa25x_cpu_pm_enter(suspend_state_t state) static int pxa25x_cpu_pm_prepare(void) { /* set resume return address */ - PSPR = virt_to_phys(cpu_resume); + PSPR = __pa_symbol(cpu_resume); return 0; } diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index c0185c5c5a08..9b69be4e9fe3 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -168,7 +168,7 @@ static int pxa27x_cpu_pm_valid(suspend_state_t state) static int pxa27x_cpu_pm_prepare(void) { /* set resume return address */ - PSPR = virt_to_phys(cpu_resume); + PSPR = __pa_symbol(cpu_resume); return 0; } diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 87acc96388c7..0cc9f124c9ac 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -123,7 +123,7 @@ static void pxa3xx_cpu_pm_suspend(void) PSPR = 0x5c014000; /* overwrite with the resume address */ - *p = virt_to_phys(cpu_resume); + *p = __pa_symbol(cpu_resume); cpu_suspend(0, pxa3xx_finish_suspend); diff --git a/arch/arm/mach-realview/platsmp-dt.c b/arch/arm/mach-realview/platsmp-dt.c index 70ca99eb52c6..c242423bf8db 100644 --- a/arch/arm/mach-realview/platsmp-dt.c +++ b/arch/arm/mach-realview/platsmp-dt.c @@ -76,7 +76,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus) } /* Put the boot address in this magic register */ regmap_write(map, REALVIEW_SYS_FLAGSSET_OFFSET, - virt_to_phys(versatile_secondary_startup)); + __pa_symbol(versatile_secondary_startup)); } static const struct smp_operations realview_dt_smp_ops __initconst = { diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 4d827a069d49..3abafdbdd7f4 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -156,7 +156,7 @@ static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle) */ mdelay(1); /* ensure the cpus other than cpu0 to startup */ - writel(virt_to_phys(secondary_startup), sram_base_addr + 8); + writel(__pa_symbol(secondary_startup), sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } @@ -195,7 +195,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) } /* set the boot function for the sram code */ - rockchip_boot_fn = virt_to_phys(secondary_startup); + rockchip_boot_fn = __pa_symbol(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c index bee8c8051929..0592534e0b88 100644 --- a/arch/arm/mach-rockchip/pm.c +++ b/arch/arm/mach-rockchip/pm.c @@ -62,7 +62,7 @@ static inline u32 rk3288_l2_config(void) static void rk3288_config_bootdata(void) { rkpm_bootdata_cpusp = rk3288_bootram_phy + (SZ_4K - 8); - rkpm_bootdata_cpu_code = virt_to_phys(cpu_resume); + rkpm_bootdata_cpu_code = __pa_symbol(cpu_resume); rkpm_bootdata_l2ctlr_f = 1; rkpm_bootdata_l2ctlr = rk3288_l2_config(); diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c index 895aca225952..f5b5c49b56ac 100644 --- a/arch/arm/mach-s3c24xx/mach-jive.c +++ b/arch/arm/mach-s3c24xx/mach-jive.c @@ -484,7 +484,7 @@ static int jive_pm_suspend(void) * correct address to resume from. */ __raw_writel(0x2BED, S3C2412_INFORM0); - __raw_writel(virt_to_phys(s3c_cpu_resume), S3C2412_INFORM1); + __raw_writel(__pa_symbol(s3c_cpu_resume), S3C2412_INFORM1); return 0; } diff --git a/arch/arm/mach-s3c24xx/pm-s3c2410.c b/arch/arm/mach-s3c24xx/pm-s3c2410.c index 20e481d8a33a..a4588daeddb0 100644 --- a/arch/arm/mach-s3c24xx/pm-s3c2410.c +++ b/arch/arm/mach-s3c24xx/pm-s3c2410.c @@ -45,7 +45,7 @@ static void s3c2410_pm_prepare(void) { /* ensure at least GSTATUS3 has the resume address */ - __raw_writel(virt_to_phys(s3c_cpu_resume), S3C2410_GSTATUS3); + __raw_writel(__pa_symbol(s3c_cpu_resume), S3C2410_GSTATUS3); S3C_PMDBG("GSTATUS3 0x%08x\n", __raw_readl(S3C2410_GSTATUS3)); S3C_PMDBG("GSTATUS4 0x%08x\n", __raw_readl(S3C2410_GSTATUS4)); diff --git a/arch/arm/mach-s3c24xx/pm-s3c2416.c b/arch/arm/mach-s3c24xx/pm-s3c2416.c index c0e328e37bd6..b5bbf0d5985c 100644 --- a/arch/arm/mach-s3c24xx/pm-s3c2416.c +++ b/arch/arm/mach-s3c24xx/pm-s3c2416.c @@ -48,7 +48,7 @@ static void s3c2416_pm_prepare(void) * correct address to resume from. */ __raw_writel(0x2BED, S3C2412_INFORM0); - __raw_writel(virt_to_phys(s3c_cpu_resume), S3C2412_INFORM1); + __raw_writel(__pa_symbol(s3c_cpu_resume), S3C2412_INFORM1); } static int s3c2416_pm_add(struct device *dev, struct subsys_interface *sif) diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c index 59d91b83b03d..945a9d1e1a71 100644 --- a/arch/arm/mach-s3c64xx/pm.c +++ b/arch/arm/mach-s3c64xx/pm.c @@ -304,7 +304,7 @@ static void s3c64xx_pm_prepare(void) wake_irqs, ARRAY_SIZE(wake_irqs)); /* store address of resume. */ - __raw_writel(virt_to_phys(s3c_cpu_resume), S3C64XX_INFORM0); + __raw_writel(__pa_symbol(s3c_cpu_resume), S3C64XX_INFORM0); /* ensure previous wakeup state is cleared before sleeping */ __raw_writel(__raw_readl(S3C64XX_WAKEUP_STAT), S3C64XX_WAKEUP_STAT); diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c index 21b4b13c5ab7..2d5f08015e34 100644 --- a/arch/arm/mach-s5pv210/pm.c +++ b/arch/arm/mach-s5pv210/pm.c @@ -69,7 +69,7 @@ static void s5pv210_pm_prepare(void) __raw_writel(s5pv210_irqwake_intmask, S5P_WAKEUP_MASK); /* ensure at least INFORM0 has the resume address */ - __raw_writel(virt_to_phys(s5pv210_cpu_resume), S5P_INFORM0); + __raw_writel(__pa_symbol(s5pv210_cpu_resume), S5P_INFORM0); tmp = __raw_readl(S5P_SLEEP_CFG); tmp &= ~(S5P_SLEEP_CFG_OSC_EN | S5P_SLEEP_CFG_USBOSC_EN); diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index 34853d5dfda2..9a7079f565bd 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -73,7 +73,7 @@ static int sa11x0_pm_enter(suspend_state_t state) RCSR = RCSR_HWR | RCSR_SWR | RCSR_WDR | RCSR_SMR; /* set resume return address */ - PSPR = virt_to_phys(cpu_resume); + PSPR = __pa_symbol(cpu_resume); /* go zzz */ cpu_suspend(0, sa1100_finish_suspend); diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index 0c6bb458b7a4..71729b8d1900 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -171,7 +171,7 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit)) static void __init shmobile_smp_apmu_setup_boot(void) { /* install boot code shared by all CPUs */ - shmobile_boot_fn = virt_to_phys(shmobile_smp_boot); + shmobile_boot_fn = __pa_symbol(shmobile_smp_boot); } void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, @@ -185,7 +185,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle) { /* For this particular CPU register boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0); + shmobile_smp_hook(cpu, __pa_symbol(secondary_startup), 0); return apmu_wrap(cpu, apmu_power_on); } @@ -301,7 +301,7 @@ int shmobile_smp_apmu_cpu_kill(unsigned int cpu) #if defined(CONFIG_SUSPEND) static int shmobile_smp_apmu_do_suspend(unsigned long cpu) { - shmobile_smp_hook(cpu, virt_to_phys(cpu_resume), 0); + shmobile_smp_hook(cpu, __pa_symbol(cpu_resume), 0); shmobile_smp_apmu_cpu_shutdown(cpu); cpu_do_idle(); /* WFI selects Core Standby */ return 1; diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c index d1ecaf37d142..f1a1efde4beb 100644 --- a/arch/arm/mach-shmobile/platsmp-scu.c +++ b/arch/arm/mach-shmobile/platsmp-scu.c @@ -24,7 +24,7 @@ static void __iomem *shmobile_scu_base; static int shmobile_scu_cpu_prepare(unsigned int cpu) { /* For this particular CPU register SCU SMP boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu), + shmobile_smp_hook(cpu, __pa_symbol(shmobile_boot_scu), shmobile_scu_base_phys); return 0; } @@ -33,7 +33,7 @@ void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys, unsigned int max_cpus) { /* install boot code shared by all CPUs */ - shmobile_boot_fn = virt_to_phys(shmobile_smp_boot); + shmobile_boot_fn = __pa_symbol(shmobile_smp_boot); /* enable SCU and cache coherency on booting CPU */ shmobile_scu_base_phys = scu_base_phys; diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index 07945748b571..0ee76772b507 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); - writel(virt_to_phys(secondary_startup), + writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); flush_cache_all(); @@ -63,7 +63,7 @@ static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle SOCFPGA_A10_RSTMGR_MODMPURST); memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); - writel(virt_to_phys(secondary_startup), + writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff)); flush_cache_all(); diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index 8d1e2d551786..39038a03836a 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -117,7 +117,7 @@ static void __init spear13xx_smp_prepare_cpus(unsigned int max_cpus) * (presently it is in SRAM). The BootMonitor waits until it receives a * soft interrupt, and then the secondary CPU branches to this address. */ - __raw_writel(virt_to_phys(spear13xx_secondary_startup), SYS_LOCATION); + __raw_writel(__pa_symbol(spear13xx_secondary_startup), SYS_LOCATION); } const struct smp_operations spear13xx_smp_ops __initconst = { diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c index ea5a2277ee46..231f19e17436 100644 --- a/arch/arm/mach-sti/platsmp.c +++ b/arch/arm/mach-sti/platsmp.c @@ -103,7 +103,7 @@ static void __init sti_smp_prepare_cpus(unsigned int max_cpus) u32 __iomem *cpu_strt_ptr; u32 release_phys; int cpu; - unsigned long entry_pa = virt_to_phys(sti_secondary_startup); + unsigned long entry_pa = __pa_symbol(sti_secondary_startup); np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-scu"); diff --git a/arch/arm/mach-sunxi/platsmp.c b/arch/arm/mach-sunxi/platsmp.c index 6642267812c9..8fb5088464db 100644 --- a/arch/arm/mach-sunxi/platsmp.c +++ b/arch/arm/mach-sunxi/platsmp.c @@ -80,7 +80,7 @@ static int sun6i_smp_boot_secondary(unsigned int cpu, spin_lock(&cpu_lock); /* Set CPU boot address */ - writel(virt_to_phys(secondary_startup), + writel(__pa_symbol(secondary_startup), cpucfg_membase + CPUCFG_PRIVATE0_REG); /* Assert the CPU core in reset */ @@ -162,7 +162,7 @@ static int sun8i_smp_boot_secondary(unsigned int cpu, spin_lock(&cpu_lock); /* Set CPU boot address */ - writel(virt_to_phys(secondary_startup), + writel(__pa_symbol(secondary_startup), cpucfg_membase + CPUCFG_PRIVATE0_REG); /* Assert the CPU core in reset */ diff --git a/arch/arm/mach-tango/platsmp.c b/arch/arm/mach-tango/platsmp.c index 98c62a4a8623..2f0c6c050fed 100644 --- a/arch/arm/mach-tango/platsmp.c +++ b/arch/arm/mach-tango/platsmp.c @@ -5,7 +5,7 @@ static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle) { - tango_set_aux_boot_addr(virt_to_phys(secondary_startup)); + tango_set_aux_boot_addr(__pa_symbol(secondary_startup)); tango_start_aux_core(cpu); return 0; } diff --git a/arch/arm/mach-tango/pm.c b/arch/arm/mach-tango/pm.c index b05c6d6f99d0..406c0814eb6e 100644 --- a/arch/arm/mach-tango/pm.c +++ b/arch/arm/mach-tango/pm.c @@ -5,7 +5,7 @@ static int tango_pm_powerdown(unsigned long arg) { - tango_suspend(virt_to_phys(cpu_resume)); + tango_suspend(__pa_symbol(cpu_resume)); return -EIO; /* tango_suspend has failed */ } diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 6fd9db54887e..dc558892753c 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -94,14 +94,14 @@ void __init tegra_cpu_reset_handler_init(void) __tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] = *((u32 *)cpu_possible_mask); __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] = - virt_to_phys((void *)secondary_startup); + __pa_symbol((void *)secondary_startup); #endif #ifdef CONFIG_PM_SLEEP __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP1] = TEGRA_IRAM_LPx_RESUME_AREA; __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP2] = - virt_to_phys((void *)tegra_resume); + __pa_symbol((void *)tegra_resume); #endif tegra_cpu_reset_handler_enable(); diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index 8f2f615ff958..8c8f26389067 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -54,7 +54,7 @@ static void wakeup_secondary(void) * backup ram register at offset 0x1FF0, which is what boot rom code * is waiting for. This will wake up the secondary core from WFE. */ - writel(virt_to_phys(secondary_startup), + writel(__pa_symbol(secondary_startup), backupram + UX500_CPU1_JUMPADDR_OFFSET); writel(0xA1FEED01, backupram + UX500_CPU1_WAKEMAGIC_OFFSET); diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 5cedcf572104..ee2a0faafaa1 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -166,7 +166,7 @@ static int __init dcscb_init(void) * Future entries into the kernel can now go * through the cluster entry vectors. */ - vexpress_flags_set(virt_to_phys(mcpm_entry_point)); + vexpress_flags_set(__pa_symbol(mcpm_entry_point)); return 0; } diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index 98e29dee91e8..742499bac6d0 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -79,7 +79,7 @@ static void __init vexpress_smp_dt_prepare_cpus(unsigned int max_cpus) * until it receives a soft interrupt, and then the * secondary CPU branches to this address. */ - vexpress_flags_set(virt_to_phys(versatile_secondary_startup)); + vexpress_flags_set(__pa_symbol(versatile_secondary_startup)); } const struct smp_operations vexpress_smp_dt_ops __initconst = { diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index 1aa4ccece69f..9b5f3c427086 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -54,7 +54,7 @@ static int tc2_pm_cpu_powerup(unsigned int cpu, unsigned int cluster) if (cluster >= TC2_CLUSTERS || cpu >= tc2_nr_cpus[cluster]) return -EINVAL; ve_spc_set_resume_addr(cluster, cpu, - virt_to_phys(mcpm_entry_point)); + __pa_symbol(mcpm_entry_point)); ve_spc_cpu_wakeup_irq(cluster, cpu, true); return 0; } @@ -159,7 +159,7 @@ static int tc2_pm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) static void tc2_pm_cpu_suspend_prepare(unsigned int cpu, unsigned int cluster) { - ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point)); + ve_spc_set_resume_addr(cluster, cpu, __pa_symbol(mcpm_entry_point)); } static void tc2_pm_cpu_is_up(unsigned int cpu, unsigned int cluster) diff --git a/arch/arm/mach-zx/platsmp.c b/arch/arm/mach-zx/platsmp.c index 0297f92084e0..afb9a82dedc3 100644 --- a/arch/arm/mach-zx/platsmp.c +++ b/arch/arm/mach-zx/platsmp.c @@ -76,7 +76,7 @@ void __init zx_smp_prepare_cpus(unsigned int max_cpus) * until it receives a soft interrupt, and then the * secondary CPU branches to this address. */ - __raw_writel(virt_to_phys(zx_secondary_startup), + __raw_writel(__pa_symbol(zx_secondary_startup), aonsysctrl_base + AON_SYS_CTRL_RESERVED1); iounmap(aonsysctrl_base); @@ -94,7 +94,7 @@ void __init zx_smp_prepare_cpus(unsigned int max_cpus) /* Map the first 4 KB IRAM for suspend usage */ sys_iram = __arm_ioremap_exec(ZX_IRAM_BASE, PAGE_SIZE, false); - zx_secondary_startup_pa = virt_to_phys(zx_secondary_startup); + zx_secondary_startup_pa = __pa_symbol(zx_secondary_startup); fncpy(sys_iram, &zx_resume_jump, zx_suspend_iram_sz); } diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c index 7cd9865bdeb7..caa6d5fe9078 100644 --- a/arch/arm/mach-zynq/platsmp.c +++ b/arch/arm/mach-zynq/platsmp.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL(zynq_cpun_start); static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle) { - return zynq_cpun_start(virt_to_phys(secondary_startup), cpu); + return zynq_cpun_start(__pa_symbol(secondary_startup), cpu); } /* From 035e787543de709f29b38752251d4724200ec353 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 19 Jan 2017 01:26:28 +0100 Subject: [PATCH 45/57] ARM: 8644/1: Reduce "CPU: shutdown" message to debug level Similar to c68b0274fb3c ("ARM: reduce "Booted secondary processor" message to debug level"), demote the "CPU: shutdown" pr_notice() into a pr_debug(). Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7dd14e8395e6..46377c40d056 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -251,7 +251,7 @@ void __cpu_die(unsigned int cpu) pr_err("CPU%u: cpu didn't die\n", cpu); return; } - pr_notice("CPU%u: shutdown\n", cpu); + pr_debug("CPU%u: shutdown\n", cpu); /* * platform_cpu_kill() is generally expected to do the powering off From d2ca5f2491c1246adf3847101fdc538a3b89439c Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Sun, 29 Jan 2017 17:31:32 +0100 Subject: [PATCH 46/57] ARM: 8646/1: mmu: decouple VECTORS_BASE from Kconfig For MMU configurations, VECTORS_BASE is always 0xffff0000, a macro definition will suffice. For no-MMU, exception base address is dynamically determined in subsequent patches. To preserve bisectability, now make the macro applicable for no-MMU scenario too. Thanks to 0-DAY kernel test infrastructure that found the bisectability issue. This macro will be restricted to MMU case upon dynamically determining exception base address for no-MMU. Once exception address is handled dynamically for no-MMU, VECTORS_BASE can be removed from Kconfig. Signed-off-by: afzal mohammed Tested-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 ++ arch/arm/mach-berlin/platsmp.c | 3 ++- arch/arm/mm/dump.c | 5 +++-- arch/arm/mm/init.c | 4 ++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index c30d0d82a105..00bd3529854a 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -111,6 +111,8 @@ #endif /* !CONFIG_MMU */ +#define VECTORS_BASE UL(0xffff0000) + #ifdef CONFIG_XIP_KERNEL #define KERNEL_START _sdata #else diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c index 1167b0ed92c8..7586b7aec272 100644 --- a/arch/arm/mach-berlin/platsmp.c +++ b/arch/arm/mach-berlin/platsmp.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -75,7 +76,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus) if (!cpu_ctrl) goto unmap_scu; - vectors_base = ioremap(CONFIG_VECTORS_BASE, SZ_32K); + vectors_base = ioremap(VECTORS_BASE, SZ_32K); if (!vectors_base) goto unmap_scu; diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 9fe8e241335c..21192d6eda40 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -18,6 +18,7 @@ #include #include +#include #include struct addr_marker { @@ -31,8 +32,8 @@ static struct addr_marker address_markers[] = { { 0, "vmalloc() Area" }, { VMALLOC_END, "vmalloc() End" }, { FIXADDR_START, "Fixmap Area" }, - { CONFIG_VECTORS_BASE, "Vectors" }, - { CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" }, + { VECTORS_BASE, "Vectors" }, + { VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" }, { -1, NULL }, }; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 4127f578086c..50e5402a8ef3 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -518,8 +519,7 @@ void __init mem_init(void) " .data : 0x%p" " - 0x%p" " (%4td kB)\n" " .bss : 0x%p" " - 0x%p" " (%4td kB)\n", - MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + - (PAGE_SIZE)), + MLK(VECTORS_BASE, VECTORS_BASE + PAGE_SIZE), #ifdef CONFIG_HAVE_TCM MLK(DTCM_OFFSET, (unsigned long) dtcm_end), MLK(ITCM_OFFSET, (unsigned long) itcm_end), From f8300a0b5de08c09db105db3c34a2a1c618e147e Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Wed, 1 Feb 2017 13:46:36 +0100 Subject: [PATCH 47/57] ARM: 8647/2: nommu: dynamic exception base address setting No-MMU dynamic exception base address configuration on CP15 processors. In the case of low vectors, decision based on whether security extensions are enabled & whether remap vectors to RAM CONFIG option is selected. For no-MMU without CP15, current default value of 0x0 is retained. Signed-off-by: afzal mohammed Tested-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/nommu.c | 52 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 13a25d6282f8..3b5c7aaf9c76 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -22,6 +23,8 @@ #include "mm.h" +unsigned long vectors_base; + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; @@ -278,15 +281,60 @@ static void adjust_lowmem_bounds_mpu(void) {} static void __init mpu_setup(void) {} #endif /* CONFIG_ARM_MPU */ +#ifdef CONFIG_CPU_CP15 +#ifdef CONFIG_CPU_HIGH_VECTOR +static unsigned long __init setup_vectors_base(void) +{ + unsigned long reg = get_cr(); + + set_cr(reg | CR_V); + return 0xffff0000; +} +#else /* CONFIG_CPU_HIGH_VECTOR */ +/* Write exception base address to VBAR */ +static inline void set_vbar(unsigned long val) +{ + asm("mcr p15, 0, %0, c12, c0, 0" : : "r" (val) : "cc"); +} + +/* + * Security extensions, bits[7:4], permitted values, + * 0b0000 - not implemented, 0b0001/0b0010 - implemented + */ +static inline bool security_extensions_enabled(void) +{ + return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4); +} + +static unsigned long __init setup_vectors_base(void) +{ + unsigned long base = 0, reg = get_cr(); + + set_cr(reg & ~CR_V); + if (security_extensions_enabled()) { + if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) + base = CONFIG_DRAM_BASE; + set_vbar(base); + } else if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) { + if (CONFIG_DRAM_BASE != 0) + pr_err("Security extensions not enabled, vectors cannot be remapped to RAM, vectors base will be 0x00000000\n"); + } + + return base; +} +#endif /* CONFIG_CPU_HIGH_VECTOR */ +#endif /* CONFIG_CPU_CP15 */ + void __init arm_mm_memblock_reserve(void) { #ifndef CONFIG_CPU_V7M + vectors_base = IS_ENABLED(CONFIG_CPU_CP15) ? setup_vectors_base() : 0; /* * Register the exception vector page. * some architectures which the DRAM is the exception vector to trap, * alloc_page breaks with error, although it is not NULL, but "0." */ - memblock_reserve(CONFIG_VECTORS_BASE, 2 * PAGE_SIZE); + memblock_reserve(vectors_base, 2 * PAGE_SIZE); #else /* ifndef CONFIG_CPU_V7M */ /* * There is no dedicated vector page on V7-M. So nothing needs to be @@ -310,7 +358,7 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { - early_trap_init((void *)CONFIG_VECTORS_BASE); + early_trap_init((void *)vectors_base); mpu_setup(); bootmem_init(); } From 58c16709f9cad7e6daabeaa5c94ac4dcb260aedd Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Wed, 1 Feb 2017 13:39:18 +0100 Subject: [PATCH 48/57] ARM: 8648/2: nommu: display vectors base VECTORS_BASE displays the exception base address. Now on no-MMU as the exception base address is dynamically estimated, define VECTORS_BASE to the variable holding it. As it is the case, limit VECTORS_BASE constant definition to MMU. Suggested-by: Russell King Signed-off-by: afzal mohammed Tested-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 00bd3529854a..1f54e4e98c1e 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -83,8 +83,15 @@ #define IOREMAP_MAX_ORDER 24 #endif +#define VECTORS_BASE UL(0xffff0000) + #else /* CONFIG_MMU */ +#ifndef __ASSEMBLY__ +extern unsigned long vectors_base; +#define VECTORS_BASE vectors_base +#endif + /* * The limitation of user task size can grow up to the end of free ram region. * It is difficult to define and perhaps will never meet the original meaning @@ -111,8 +118,6 @@ #endif /* !CONFIG_MMU */ -#define VECTORS_BASE UL(0xffff0000) - #ifdef CONFIG_XIP_KERNEL #define KERNEL_START _sdata #else From ad475117d2015781789364d599b85c67254680a1 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Wed, 1 Feb 2017 13:47:34 +0100 Subject: [PATCH 49/57] ARM: 8649/2: nommu: remove Hivecs configuration is asm Now that exception based address is handled dynamically for processors with CP15, remove Hivecs configuration in assembly. Signed-off-by: afzal mohammed Tested-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/kernel/head-nommu.S | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 6b4eb27b8758..2e21e08de747 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -151,11 +151,6 @@ __after_proc_init: #endif #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I -#endif -#ifdef CONFIG_CPU_HIGH_VECTOR - orr r0, r0, #CR_V -#else - bic r0, r0, #CR_V #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #elif defined (CONFIG_CPU_V7M) From 050d18d1c65113b4558d86d53465ebe1d04910fb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 30 Jan 2017 18:29:28 +0100 Subject: [PATCH 50/57] ARM: 8650/1: module: handle negative R_ARM_PREL31 addends correctly According to the spec 'ELF for the ARM Architecture' (IHI 0044E), addends for R_ARM_PREL31 relocations are 31-bit signed quantities, so we need to sign extend the value to 32 bits before it can be used as an offset in the calculation of the relocated value. We have not been bitten by this because these relocations are usually emitted against the start of a section, which means the addends never assume negative values in practice. But it is a bug nonetheless, so fix it. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/module.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 4f14b5ce6535..80254b47dc34 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -155,8 +155,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, break; case R_ARM_PREL31: - offset = *(u32 *)loc + sym->st_value - loc; - *(u32 *)loc = offset & 0x7fffffff; + offset = (*(s32 *)loc << 1) >> 1; /* sign extend */ + offset += sym->st_value - loc; + if (offset >= 0x40000000 || offset < -0x40000000) { + pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", + module->name, relindex, i, symname, + ELF32_R_TYPE(rel->r_info), loc, + sym->st_value); + return -ENOEXEC; + } + *(u32 *)loc &= 0x80000000; + *(u32 *)loc |= offset & 0x7fffffff; break; case R_ARM_MOVW_ABS_NC: From 1c63d4c5e16394698072d671029e753715af100a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 1 Feb 2017 14:29:22 +0100 Subject: [PATCH 51/57] ARM: 8651/1: cache-uniphier: include instead of Nothing in this header file depends on . Rather, should be included for -ENODEV. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-uniphier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/hardware/cache-uniphier.h b/arch/arm/include/asm/hardware/cache-uniphier.h index eaa60da7dac3..0ef42ae75b6c 100644 --- a/arch/arm/include/asm/hardware/cache-uniphier.h +++ b/arch/arm/include/asm/hardware/cache-uniphier.h @@ -16,7 +16,7 @@ #ifndef __CACHE_UNIPHIER_H #define __CACHE_UNIPHIER_H -#include +#include #ifdef CONFIG_CACHE_UNIPHIER int uniphier_cache_init(void); From 06369a1e58bd5eb89c02f9a2cf1a35152cbf1154 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 1 Feb 2017 14:30:09 +0100 Subject: [PATCH 52/57] ARM: 8652/1: cache-uniphier: clean up active way setup code Now, the active way setup function is called with a fixed value zero for the second argument. The code can be simpler. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/mm/cache-uniphier.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c index dfe97b409916..f57b080b6fd4 100644 --- a/arch/arm/mm/cache-uniphier.c +++ b/arch/arm/mm/cache-uniphier.c @@ -15,6 +15,7 @@ #define pr_fmt(fmt) "uniphier: " fmt +#include #include #include #include @@ -71,8 +72,7 @@ * @ctrl_base: virtual base address of control registers * @rev_base: virtual base address of revision registers * @op_base: virtual base address of operation registers - * @way_present_mask: each bit specifies if the way is present - * @way_locked_mask: each bit specifies if the way is locked + * @way_mask: each bit specifies if the way is present * @nsets: number of associativity sets * @line_size: line size in bytes * @range_op_max_size: max size that can be handled by a single range operation @@ -83,8 +83,7 @@ struct uniphier_cache_data { void __iomem *rev_base; void __iomem *op_base; void __iomem *way_ctrl_base; - u32 way_present_mask; - u32 way_locked_mask; + u32 way_mask; u32 nsets; u32 line_size; u32 range_op_max_size; @@ -234,17 +233,13 @@ static void __uniphier_cache_enable(struct uniphier_cache_data *data, bool on) writel_relaxed(val, data->ctrl_base + UNIPHIER_SSCC); } -static void __init __uniphier_cache_set_locked_ways( - struct uniphier_cache_data *data, - u32 way_mask) +static void __init __uniphier_cache_set_active_ways( + struct uniphier_cache_data *data) { unsigned int cpu; - data->way_locked_mask = way_mask & data->way_present_mask; - for_each_possible_cpu(cpu) - writel_relaxed(~data->way_locked_mask & data->way_present_mask, - data->way_ctrl_base + 4 * cpu); + writel_relaxed(data->way_mask, data->way_ctrl_base + 4 * cpu); } static void uniphier_cache_maint_range(unsigned long start, unsigned long end, @@ -307,7 +302,7 @@ static void __init uniphier_cache_enable(void) list_for_each_entry(data, &uniphier_cache_list, list) { __uniphier_cache_enable(data, true); - __uniphier_cache_set_locked_ways(data, 0); + __uniphier_cache_set_active_ways(data); } } @@ -382,8 +377,8 @@ static int __init __uniphier_cache_init(struct device_node *np, goto err; } - data->way_present_mask = - ((u32)1 << cache_size / data->nsets / data->line_size) - 1; + data->way_mask = GENMASK(cache_size / data->nsets / data->line_size - 1, + 0); data->ctrl_base = of_iomap(np, 0); if (!data->ctrl_base) { From 7b96ddd02ec33d705df02260e2d5dcfb1d498f4b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Feb 2017 13:29:36 +0100 Subject: [PATCH 53/57] ARM: 8654/1: decompressor: add strlen prototype The decompress.c file contains a declaration for strstr() so we can include some compression library code. With the updated LZ4 implementation, we run into the same problem again for strlen(): In file included from ../include/linux/rcupdate.h:40:0, from ../include/linux/srcu.h:33, from ../include/linux/notifier.h:15, from ../include/linux/memory_hotplug.h:6, from ../include/linux/mmzone.h:749, from ../include/linux/gfp.h:5, from ../include/linux/kmod.h:22, from ../include/linux/module.h:13, from ../arch/arm/boot/compressed/../../../../lib/lz4/lz4_decompress.c:39, from ../arch/arm/boot/compressed/../../../../lib/decompress_unlz4.c:13, from ../arch/arm/boot/compressed/decompress.c:55: include/linux/cpumask.h: In function 'cpumask_parse': include/linux/cpumask.h:592:53: error: implicit declaration of function 'strlen';did you mean 'strstr'? [-Werror=implicit-function-declaration] This adds another declaration to work around the new problem. Fixes: ce83d9ab80d6 ("lib: update LZ4 compressor module") Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/boot/compressed/decompress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index a0765e7ed6c7..ea7832702a8f 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -32,6 +32,7 @@ extern void error(char *); /* Not needed, but used in some headers pulled in by decompressors */ extern char * strstr(const char * s1, const char *s2); +extern size_t strlen(const char *s); #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" From e2fce0a28d8f4293d68abf6485956e7794a30124 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Feb 2017 13:30:40 +0100 Subject: [PATCH 54/57] ARM: 8655/1: improve NOMMU definition of pgprot_*() The tegra DRM driver produces a harmless warning when built for NOMMU: drivers/gpu/drm/tegra/gem.c: In function 'tegra_drm_mmap': drivers/gpu/drm/tegra/gem.c:508:12: unused variable 'prot' This is because pgprot_writecombine() on ARM returns a constant and ignores its argument. The version in asm-generic doesn't have that problem, so let's use that one instead. We don't actually care about the value on NOMMU, and this is consistent with what some other architectures do. Signed-off-by: Arnd Bergmann Acked-by: Thierry Reding Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-nommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index add094d09e3e..302240c19a5a 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -63,9 +63,9 @@ typedef pte_t *pte_addr_t; /* * Mark the prot value as uncacheable and unbufferable. */ -#define pgprot_noncached(prot) __pgprot(0) -#define pgprot_writecombine(prot) __pgprot(0) -#define pgprot_dmacoherent(prot) __pgprot(0) +#define pgprot_noncached(prot) (prot) +#define pgprot_writecombine(prot) (prot) +#define pgprot_dmacoherent(prot) (prot) /* From 3928624812dcfa39b6a67f9de46efcb51c573ad0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 16 Jan 2017 15:11:10 +0000 Subject: [PATCH 55/57] ARM: mm: move initrd init code out of arm_memblock_init() Move the ARM initrd initialisation code out of arm_memblock_init() into its own function, so it can be cleaned up. Signed-off-by: Russell King --- arch/arm/mm/init.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 50e5402a8ef3..43d8825e59bb 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -228,11 +228,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) return phys; } -void __init arm_memblock_init(const struct machine_desc *mdesc) +static void __init arm_initrd_init(void) { - /* Register the kernel text, kernel data and initrd with memblock. */ - memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); - #ifdef CONFIG_BLK_DEV_INITRD /* FDT scan will populate initrd_start */ if (initrd_start && !phys_initrd_size) { @@ -260,6 +257,14 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) initrd_end = initrd_start + phys_initrd_size; } #endif +} + +void __init arm_memblock_init(const struct machine_desc *mdesc) +{ + /* Register the kernel text, kernel data and initrd with memblock. */ + memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); + + arm_initrd_init(); arm_mm_memblock_reserve(); From 68b32f361f3892fc376051b1702954b2dc692d13 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 16 Jan 2017 15:13:25 +0000 Subject: [PATCH 56/57] ARM: mm: clean up initrd initialisation Rather than repeatedly testing phys_initrd_size to see if the initrd is still enabled, return from the new function to avoid executing the remaining initialisation. Signed-off-by: Russell King --- arch/arm/mm/init.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 43d8825e59bb..15739a95552a 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -236,26 +236,29 @@ static void __init arm_initrd_init(void) phys_initrd_start = __virt_to_phys(initrd_start); phys_initrd_size = initrd_end - initrd_start; } + initrd_start = initrd_end = 0; - if (phys_initrd_size && - !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { + + if (!phys_initrd_size) + return; + + if (!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", (u64)phys_initrd_start, phys_initrd_size); - phys_initrd_start = phys_initrd_size = 0; + return; } - if (phys_initrd_size && - memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { + + if (memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", (u64)phys_initrd_start, phys_initrd_size); - phys_initrd_start = phys_initrd_size = 0; + return; } - if (phys_initrd_size) { - memblock_reserve(phys_initrd_start, phys_initrd_size); - /* Now convert initrd to virtual addresses */ - initrd_start = __phys_to_virt(phys_initrd_start); - initrd_end = initrd_start + phys_initrd_size; - } + memblock_reserve(phys_initrd_start, phys_initrd_size); + + /* Now convert initrd to virtual addresses */ + initrd_start = __phys_to_virt(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; #endif } From cdcc5fa0415d943288c6316bd32e76befb1027c5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 16 Jan 2017 15:21:05 +0000 Subject: [PATCH 57/57] ARM: mm: round the initrd reservation to page boundaries Round the initrd memblock reservation to page boundaries to prevent other data sharing the initrd pages. This prevents an allocation possibly overlapping with the initrd, which would later get trampled on in free_initrd_mem(). Signed-off-by: Russell King --- arch/arm/mm/init.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 15739a95552a..d1e26610977d 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -231,6 +231,9 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) static void __init arm_initrd_init(void) { #ifdef CONFIG_BLK_DEV_INITRD + phys_addr_t start; + unsigned long size; + /* FDT scan will populate initrd_start */ if (initrd_start && !phys_initrd_size) { phys_initrd_start = __virt_to_phys(initrd_start); @@ -242,19 +245,29 @@ static void __init arm_initrd_init(void) if (!phys_initrd_size) return; - if (!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { + /* + * Round the memory region to page boundaries as per free_initrd_mem() + * This allows us to detect whether the pages overlapping the initrd + * are in use, but more importantly, reserves the entire set of pages + * as we don't want these pages allocated for other purposes. + */ + start = round_down(phys_initrd_start, PAGE_SIZE); + size = phys_initrd_size + (phys_initrd_start - start); + size = round_up(size, PAGE_SIZE); + + if (!memblock_is_region_memory(start, size)) { pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", - (u64)phys_initrd_start, phys_initrd_size); + (u64)start, size); return; } - if (memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { + if (memblock_is_region_reserved(start, size)) { pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", - (u64)phys_initrd_start, phys_initrd_size); + (u64)start, size); return; } - memblock_reserve(phys_initrd_start, phys_initrd_size); + memblock_reserve(start, size); /* Now convert initrd to virtual addresses */ initrd_start = __phys_to_virt(phys_initrd_start);