2006-01-02 19:04:38 +01:00
|
|
|
/*
|
|
|
|
* net/tipc/link.h: Include file for TIPC link code
|
2007-02-09 15:25:21 +01:00
|
|
|
*
|
2014-08-23 00:09:07 +02:00
|
|
|
* Copyright (c) 1995-2006, 2013-2014, Ericsson AB
|
2011-01-07 17:43:40 +01:00
|
|
|
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
|
2006-01-02 19:04:38 +01:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 13:30:43 +01:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-02 19:04:38 +01:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 13:30:43 +01:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-02 19:04:38 +01:00
|
|
|
*
|
2006-01-11 13:30:43 +01:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-02 19:04:38 +01:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _TIPC_LINK_H
|
|
|
|
#define _TIPC_LINK_H
|
|
|
|
|
2014-11-20 10:29:07 +01:00
|
|
|
#include <net/genetlink.h>
|
2006-01-02 19:04:38 +01:00
|
|
|
#include "msg.h"
|
|
|
|
#include "node.h"
|
|
|
|
|
2015-01-09 08:27:01 +01:00
|
|
|
/* TIPC-specific error codes
|
|
|
|
*/
|
|
|
|
#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
|
|
|
|
|
2014-02-13 23:29:08 +01:00
|
|
|
/* Out-of-range value for link sequence numbers
|
2011-10-24 21:26:24 +02:00
|
|
|
*/
|
|
|
|
#define INVALID_LINK_SEQ 0x10000
|
|
|
|
|
2014-02-13 23:29:08 +01:00
|
|
|
/* Link working states
|
2006-01-02 19:04:38 +01:00
|
|
|
*/
|
|
|
|
#define WORKING_WORKING 560810u
|
|
|
|
#define WORKING_UNKNOWN 560811u
|
|
|
|
#define RESET_UNKNOWN 560812u
|
|
|
|
#define RESET_RESET 560813u
|
|
|
|
|
2014-02-13 23:29:08 +01:00
|
|
|
/* Link endpoint execution states
|
|
|
|
*/
|
tipc: eliminate delayed link deletion at link failover
When a bearer is disabled manually, all its links have to be reset
and deleted. However, if there is a remaining, parallel link ready
to take over a deleted link's traffic, we currently delay the delete
of the removed link until the failover procedure is finished. This
is because the remaining link needs to access state from the reset
link, such as the last received packet number, and any partially
reassembled buffer, in order to perform a successful failover.
In this commit, we do instead move the state data over to the new
link, so that it can fulfill the procedure autonomously, without
accessing any data on the old link. This means that we can now
proceed and delete all pertaining links immediately when a bearer
is disabled. This saves us from some unnecessary complexity in such
situations.
We also choose to change the confusing definitions CHANGEOVER_PROTOCOL,
ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL,
FAILOVER_MSG and SYNCH_MSG respectively.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 15:33:01 +02:00
|
|
|
#define LINK_STARTED 0x0001
|
|
|
|
#define LINK_STOPPED 0x0002
|
|
|
|
#define LINK_SYNCHING 0x0004
|
|
|
|
#define LINK_FAILINGOVER 0x0008
|
2014-02-13 23:29:08 +01:00
|
|
|
|
|
|
|
/* Starting value for maximum packet size negotiation on unicast links
|
2006-01-02 19:04:38 +01:00
|
|
|
* (unless bearer MTU is less)
|
|
|
|
*/
|
|
|
|
#define MAX_PKT_DEFAULT 1500
|
|
|
|
|
2012-07-11 15:40:43 +02:00
|
|
|
struct tipc_stats {
|
|
|
|
u32 sent_info; /* used in counting # sent packets */
|
|
|
|
u32 recv_info; /* used in counting # recv'd packets */
|
|
|
|
u32 sent_states;
|
|
|
|
u32 recv_states;
|
|
|
|
u32 sent_probes;
|
|
|
|
u32 recv_probes;
|
|
|
|
u32 sent_nacks;
|
|
|
|
u32 recv_nacks;
|
|
|
|
u32 sent_acks;
|
|
|
|
u32 sent_bundled;
|
|
|
|
u32 sent_bundles;
|
|
|
|
u32 recv_bundled;
|
|
|
|
u32 recv_bundles;
|
|
|
|
u32 retransmitted;
|
|
|
|
u32 sent_fragmented;
|
|
|
|
u32 sent_fragments;
|
|
|
|
u32 recv_fragmented;
|
|
|
|
u32 recv_fragments;
|
|
|
|
u32 link_congs; /* # port sends blocked by congestion */
|
|
|
|
u32 deferred_recv;
|
|
|
|
u32 duplicates;
|
|
|
|
u32 max_queue_sz; /* send queue size high water mark */
|
|
|
|
u32 accu_queue_sz; /* used for send queue size profiling */
|
|
|
|
u32 queue_sz_counts; /* used for send queue size profiling */
|
|
|
|
u32 msg_length_counts; /* used for message length profiling */
|
|
|
|
u32 msg_lengths_total; /* used for message length profiling */
|
|
|
|
u32 msg_length_profile[7]; /* used for msg. length profiling */
|
|
|
|
};
|
|
|
|
|
2006-01-02 19:04:38 +01:00
|
|
|
/**
|
2011-12-30 02:58:42 +01:00
|
|
|
* struct tipc_link - TIPC link data structure
|
2006-01-02 19:04:38 +01:00
|
|
|
* @addr: network address of link's peer node
|
|
|
|
* @name: link name character string
|
|
|
|
* @media_addr: media address to use when sending messages over link
|
|
|
|
* @timer: link timer
|
|
|
|
* @owner: pointer to peer node
|
tipc: add reference count to struct tipc_link
When a bearer is disabled, all pertaining links will be reset and
deleted. However, if there is a second active link towards a killed
link's destination, the delete has to be postponed until the failover
is finished. During this interval, we currently put the link in zombie
mode, i.e., we take it out of traffic, delete its timer, but leave it
attached to the owner node structure until all missing packets have
been received. When this is done, we detach the link from its node
and delete it, assuming that the synchronous timer deletion that was
initiated earlier in a different thread has finished.
This is unsafe, as the failover may finish before del_timer_sync()
has returned in the other thread.
We fix this by adding an atomic reference counter of type kref in
struct tipc_link. The counter keeps track of the references kept
to the link by the owner node and the timer. We then do a conditional
delete, based on the reference counter, both after the failover has
been finished and when the timer expires, if applicable. Whoever
comes last, will actually delete the link. This approach also implies
that we can make the deletion of the timer asynchronous.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-03 14:59:17 +01:00
|
|
|
* @refcnt: reference counter for permanent references (owner node & timer)
|
2014-02-13 23:29:08 +01:00
|
|
|
* @flags: execution state flags for link endpoint instance
|
2006-01-02 19:04:38 +01:00
|
|
|
* @checkpoint: reference point for triggering link continuity checking
|
|
|
|
* @peer_session: link session # being used by peer end of link
|
|
|
|
* @peer_bearer_id: bearer id used by link's peer endpoint
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 04:55:46 +02:00
|
|
|
* @bearer_id: local bearer id used by link
|
2007-02-09 15:25:21 +01:00
|
|
|
* @tolerance: minimum link continuity loss needed to reset link [in ms]
|
2015-01-09 08:27:00 +01:00
|
|
|
* @cont_intv: link continuity testing interval
|
2006-01-02 19:04:38 +01:00
|
|
|
* @abort_limit: # of unacknowledged continuity probes needed to reset link
|
|
|
|
* @state: current state of link FSM
|
|
|
|
* @fsm_msg_cnt: # of protocol messages link FSM has sent in current state
|
|
|
|
* @proto_msg: template for control messages generated by link
|
|
|
|
* @pmsg: convenience pointer to "proto_msg" field
|
|
|
|
* @priority: current link priority
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 04:55:46 +02:00
|
|
|
* @net_plane: current link network plane ('A' through 'H')
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-25 17:07:24 +01:00
|
|
|
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
|
2006-01-02 19:04:38 +01:00
|
|
|
* @exp_msg_count: # of tunnelled messages expected during link changeover
|
|
|
|
* @reset_checkpoint: seq # of last acknowledged message at time of link reset
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 15:33:02 +02:00
|
|
|
* @mtu: current maximum packet size for this link
|
|
|
|
* @advertised_mtu: advertised own mtu when link is being established
|
2015-03-13 21:08:10 +01:00
|
|
|
* @transmitq: queue for sent, non-acked messages
|
|
|
|
* @backlogq: queue for messages waiting to be sent
|
2006-01-02 19:04:38 +01:00
|
|
|
* @next_out_no: next sequence number to use for outbound messages
|
|
|
|
* @last_retransmitted: sequence number of most recently retransmitted message
|
|
|
|
* @stale_count: # of identical retransmit requests made by peer
|
|
|
|
* @next_in_no: next sequence number to expect for inbound messages
|
2014-11-26 04:41:53 +01:00
|
|
|
* @deferred_queue: deferred queue saved OOS b'cast message received from node
|
2006-01-02 19:04:38 +01:00
|
|
|
* @unacked_window: # of inbound messages rx'd without ack'ing back to peer
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 14:36:41 +01:00
|
|
|
* @inputq: buffer queue for messages to be delivered upwards
|
|
|
|
* @namedq: buffer queue for name table messages to be delivered upwards
|
2006-01-02 19:04:38 +01:00
|
|
|
* @next_out: ptr to first unsent outbound message in queue
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 14:36:41 +01:00
|
|
|
* @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
|
2006-01-02 19:04:38 +01:00
|
|
|
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
|
2014-05-14 11:39:12 +02:00
|
|
|
* @reasm_buf: head of partially reassembled inbound message fragments
|
2006-01-02 19:04:38 +01:00
|
|
|
* @stats: collects statistics regarding link activity
|
|
|
|
*/
|
2011-12-30 02:58:42 +01:00
|
|
|
struct tipc_link {
|
2006-01-02 19:04:38 +01:00
|
|
|
u32 addr;
|
|
|
|
char name[TIPC_MAX_LINK_NAME];
|
|
|
|
struct tipc_media_addr media_addr;
|
|
|
|
struct timer_list timer;
|
2008-09-03 08:38:32 +02:00
|
|
|
struct tipc_node *owner;
|
tipc: add reference count to struct tipc_link
When a bearer is disabled, all pertaining links will be reset and
deleted. However, if there is a second active link towards a killed
link's destination, the delete has to be postponed until the failover
is finished. During this interval, we currently put the link in zombie
mode, i.e., we take it out of traffic, delete its timer, but leave it
attached to the owner node structure until all missing packets have
been received. When this is done, we detach the link from its node
and delete it, assuming that the synchronous timer deletion that was
initiated earlier in a different thread has finished.
This is unsafe, as the failover may finish before del_timer_sync()
has returned in the other thread.
We fix this by adding an atomic reference counter of type kref in
struct tipc_link. The counter keeps track of the references kept
to the link by the owner node and the timer. We then do a conditional
delete, based on the reference counter, both after the failover has
been finished and when the timer expires, if applicable. Whoever
comes last, will actually delete the link. This approach also implies
that we can make the deletion of the timer asynchronous.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-03 14:59:17 +01:00
|
|
|
struct kref ref;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
|
|
|
/* Management and link supervision data */
|
2014-02-13 23:29:08 +01:00
|
|
|
unsigned int flags;
|
2006-01-02 19:04:38 +01:00
|
|
|
u32 checkpoint;
|
|
|
|
u32 peer_session;
|
|
|
|
u32 peer_bearer_id;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 04:55:46 +02:00
|
|
|
u32 bearer_id;
|
2006-01-02 19:04:38 +01:00
|
|
|
u32 tolerance;
|
2015-01-09 08:27:00 +01:00
|
|
|
unsigned long cont_intv;
|
2006-01-02 19:04:38 +01:00
|
|
|
u32 abort_limit;
|
|
|
|
int state;
|
|
|
|
u32 fsm_msg_cnt;
|
|
|
|
struct {
|
|
|
|
unchar hdr[INT_H_SIZE];
|
|
|
|
unchar body[TIPC_MAX_IF_NAME];
|
|
|
|
} proto_msg;
|
|
|
|
struct tipc_msg *pmsg;
|
|
|
|
u32 priority;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 04:55:46 +02:00
|
|
|
char net_plane;
|
tipc: eliminate delayed link deletion at link failover
When a bearer is disabled manually, all its links have to be reset
and deleted. However, if there is a remaining, parallel link ready
to take over a deleted link's traffic, we currently delay the delete
of the removed link until the failover procedure is finished. This
is because the remaining link needs to access state from the reset
link, such as the last received packet number, and any partially
reassembled buffer, in order to perform a successful failover.
In this commit, we do instead move the state data over to the new
link, so that it can fulfill the procedure autonomously, without
accessing any data on the old link. This means that we can now
proceed and delete all pertaining links immediately when a bearer
is disabled. This saves us from some unnecessary complexity in such
situations.
We also choose to change the confusing definitions CHANGEOVER_PROTOCOL,
ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL,
FAILOVER_MSG and SYNCH_MSG respectively.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 15:33:01 +02:00
|
|
|
u16 synch_point;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
tipc: eliminate delayed link deletion at link failover
When a bearer is disabled manually, all its links have to be reset
and deleted. However, if there is a remaining, parallel link ready
to take over a deleted link's traffic, we currently delay the delete
of the removed link until the failover procedure is finished. This
is because the remaining link needs to access state from the reset
link, such as the last received packet number, and any partially
reassembled buffer, in order to perform a successful failover.
In this commit, we do instead move the state data over to the new
link, so that it can fulfill the procedure autonomously, without
accessing any data on the old link. This means that we can now
proceed and delete all pertaining links immediately when a bearer
is disabled. This saves us from some unnecessary complexity in such
situations.
We also choose to change the confusing definitions CHANGEOVER_PROTOCOL,
ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL,
FAILOVER_MSG and SYNCH_MSG respectively.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 15:33:01 +02:00
|
|
|
/* Failover */
|
|
|
|
u16 failover_pkts;
|
|
|
|
u16 failover_checkpt;
|
|
|
|
struct sk_buff *failover_skb;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
2007-02-09 15:25:21 +01:00
|
|
|
/* Max packet negotiation */
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 15:33:02 +02:00
|
|
|
u16 mtu;
|
|
|
|
u16 advertised_mtu;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
|
|
|
/* Sending */
|
2015-03-13 21:08:10 +01:00
|
|
|
struct sk_buff_head transmq;
|
|
|
|
struct sk_buff_head backlogq;
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-25 17:07:24 +01:00
|
|
|
struct {
|
|
|
|
u16 len;
|
|
|
|
u16 limit;
|
|
|
|
} backlog[5];
|
2006-01-02 19:04:38 +01:00
|
|
|
u32 next_out_no;
|
2015-03-13 21:08:10 +01:00
|
|
|
u32 window;
|
2007-02-09 15:25:21 +01:00
|
|
|
u32 last_retransmitted;
|
|
|
|
u32 stale_count;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
|
|
|
/* Reception */
|
|
|
|
u32 next_in_no;
|
2015-03-13 21:08:10 +01:00
|
|
|
u32 rcv_unacked;
|
|
|
|
struct sk_buff_head deferdq;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 14:36:41 +01:00
|
|
|
struct sk_buff_head inputq;
|
|
|
|
struct sk_buff_head namedq;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
|
|
|
/* Congestion handling */
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 14:36:41 +01:00
|
|
|
struct sk_buff_head wakeupq;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 09:28:06 +01:00
|
|
|
/* Fragmentation/reassembly */
|
2014-05-14 11:39:12 +02:00
|
|
|
struct sk_buff *reasm_buf;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
2007-02-09 15:25:21 +01:00
|
|
|
/* Statistics */
|
2012-07-11 15:40:43 +02:00
|
|
|
struct tipc_stats stats;
|
2006-01-02 19:04:38 +01:00
|
|
|
};
|
|
|
|
|
2011-01-07 17:43:40 +01:00
|
|
|
struct tipc_port;
|
2006-01-02 19:04:38 +01:00
|
|
|
|
2011-12-30 02:58:42 +01:00
|
|
|
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
|
2011-02-28 17:32:27 +01:00
|
|
|
struct tipc_bearer *b_ptr,
|
2006-01-18 00:38:21 +01:00
|
|
|
const struct tipc_media_addr *media_addr);
|
2015-02-03 14:59:18 +01:00
|
|
|
void tipc_link_delete(struct tipc_link *link);
|
2015-01-09 08:27:05 +01:00
|
|
|
void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
|
|
|
|
bool shutting_down);
|
2014-01-07 23:02:41 +01:00
|
|
|
void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
|
2014-02-18 09:06:46 +01:00
|
|
|
void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
|
2011-12-30 02:58:42 +01:00
|
|
|
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
|
|
|
|
int tipc_link_is_up(struct tipc_link *l_ptr);
|
|
|
|
int tipc_link_is_active(struct tipc_link *l_ptr);
|
2014-01-07 23:02:44 +01:00
|
|
|
void tipc_link_purge_queues(struct tipc_link *l_ptr);
|
2014-05-05 02:56:17 +02:00
|
|
|
void tipc_link_reset_all(struct tipc_node *node);
|
2011-12-30 02:58:42 +01:00
|
|
|
void tipc_link_reset(struct tipc_link *l_ptr);
|
2015-01-09 08:27:05 +01:00
|
|
|
void tipc_link_reset_list(struct net *net, unsigned int bearer_id);
|
|
|
|
int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
|
|
|
|
u32 selector);
|
|
|
|
int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
|
|
|
|
u32 selector);
|
2015-01-09 08:27:06 +01:00
|
|
|
int __tipc_link_xmit(struct net *net, struct tipc_link *link,
|
|
|
|
struct sk_buff_head *list);
|
2014-02-18 09:06:46 +01:00
|
|
|
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 15:33:02 +02:00
|
|
|
u32 gap, u32 tolerance, u32 priority);
|
2014-11-26 04:41:48 +01:00
|
|
|
void tipc_link_push_packets(struct tipc_link *l_ptr);
|
2014-11-26 04:41:53 +01:00
|
|
|
u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf);
|
2011-12-30 02:58:42 +01:00
|
|
|
void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
|
|
|
|
void tipc_link_retransmit(struct tipc_link *l_ptr,
|
|
|
|
struct sk_buff *start, u32 retransmits);
|
2014-11-26 04:41:52 +01:00
|
|
|
struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list,
|
|
|
|
const struct sk_buff *skb);
|
2006-01-02 19:04:38 +01:00
|
|
|
|
2014-11-20 10:29:12 +01:00
|
|
|
int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb);
|
|
|
|
int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
|
2014-11-20 10:29:13 +01:00
|
|
|
int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
|
2014-11-20 10:29:14 +01:00
|
|
|
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
|
2014-11-20 10:29:07 +01:00
|
|
|
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 14:36:41 +01:00
|
|
|
void link_prepare_wakeup(struct tipc_link *l);
|
2014-11-20 10:29:07 +01:00
|
|
|
|
2006-01-02 19:04:38 +01:00
|
|
|
/*
|
|
|
|
* Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
|
|
|
|
*/
|
2011-10-24 22:03:12 +02:00
|
|
|
static inline u32 buf_seqno(struct sk_buff *buf)
|
|
|
|
{
|
|
|
|
return msg_seqno(buf_msg(buf));
|
|
|
|
}
|
|
|
|
|
2006-01-02 19:04:38 +01:00
|
|
|
static inline u32 mod(u32 x)
|
|
|
|
{
|
|
|
|
return x & 0xffffu;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int less_eq(u32 left, u32 right)
|
|
|
|
{
|
2010-09-22 22:43:57 +02:00
|
|
|
return mod(right - left) < 32768u;
|
2006-01-02 19:04:38 +01:00
|
|
|
}
|
|
|
|
|
2014-11-26 04:41:52 +01:00
|
|
|
static inline int more(u32 left, u32 right)
|
|
|
|
{
|
|
|
|
return !less_eq(left, right);
|
|
|
|
}
|
|
|
|
|
2006-01-02 19:04:38 +01:00
|
|
|
static inline int less(u32 left, u32 right)
|
|
|
|
{
|
2010-09-22 22:43:57 +02:00
|
|
|
return less_eq(left, right) && (mod(right) != mod(left));
|
2006-01-02 19:04:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline u32 lesser(u32 left, u32 right)
|
|
|
|
{
|
|
|
|
return less_eq(left, right) ? left : right;
|
|
|
|
}
|
|
|
|
|
2015-02-05 14:36:36 +01:00
|
|
|
static inline u32 link_own_addr(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return msg_prevnode(l->pmsg);
|
|
|
|
}
|
2010-05-11 16:30:11 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Link status checking routines
|
|
|
|
*/
|
2011-12-30 02:58:42 +01:00
|
|
|
static inline int link_working_working(struct tipc_link *l_ptr)
|
2010-05-11 16:30:11 +02:00
|
|
|
{
|
2010-09-22 22:43:57 +02:00
|
|
|
return l_ptr->state == WORKING_WORKING;
|
2010-05-11 16:30:11 +02:00
|
|
|
}
|
|
|
|
|
2011-12-30 02:58:42 +01:00
|
|
|
static inline int link_working_unknown(struct tipc_link *l_ptr)
|
2010-05-11 16:30:11 +02:00
|
|
|
{
|
2010-09-22 22:43:57 +02:00
|
|
|
return l_ptr->state == WORKING_UNKNOWN;
|
2010-05-11 16:30:11 +02:00
|
|
|
}
|
|
|
|
|
2011-12-30 02:58:42 +01:00
|
|
|
static inline int link_reset_unknown(struct tipc_link *l_ptr)
|
2010-05-11 16:30:11 +02:00
|
|
|
{
|
2010-09-22 22:43:57 +02:00
|
|
|
return l_ptr->state == RESET_UNKNOWN;
|
2010-05-11 16:30:11 +02:00
|
|
|
}
|
|
|
|
|
2011-12-30 02:58:42 +01:00
|
|
|
static inline int link_reset_reset(struct tipc_link *l_ptr)
|
2010-05-11 16:30:11 +02:00
|
|
|
{
|
2010-09-22 22:43:57 +02:00
|
|
|
return l_ptr->state == RESET_RESET;
|
2010-05-11 16:30:11 +02:00
|
|
|
}
|
|
|
|
|
2006-01-02 19:04:38 +01:00
|
|
|
#endif
|