diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 113bfc468a4d..b118744d3382 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -164,6 +164,9 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */ #define IEEE80211_MAX_FRAME_LEN 2352 +/* Maximal size of an A-MSDU that can be transported in a HT BA session */ +#define IEEE80211_MAX_MPDU_LEN_HT_BA 4095 + /* Maximal size of an A-MSDU */ #define IEEE80211_MAX_MPDU_LEN_HT_3839 3839 #define IEEE80211_MAX_MPDU_LEN_HT_7935 7935 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5f4b4c773a92..a3ee76559791 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -713,6 +713,7 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll * frame (PS-Poll or uAPSD). * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information + * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame * * These flags are used in tx_info->control.flags. */ @@ -720,6 +721,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0), IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1), IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), + IEEE80211_TX_CTRL_AMSDU = BIT(3), }; /* @@ -1746,6 +1748,7 @@ struct ieee80211_sta_rates { * Both additional HT limits must be enforced by the low level driver. * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2). * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) */ struct ieee80211_sta { @@ -1767,6 +1770,7 @@ struct ieee80211_sta { u8 max_amsdu_subframes; u16 max_amsdu_len; bool support_p2p_ps; + u16 max_rc_amsdu_len; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; @@ -1983,6 +1987,15 @@ struct ieee80211_txq { * @IEEE80211_HW_USES_RSS: The device uses RSS and thus requires parallel RX, * which implies using per-CPU station statistics. * + * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated + * A-MSDU frames. Requires software tx queueing and fast-xmit support. + * When not using minstrel/minstrel_ht rate control, the driver must + * limit the maximum A-MSDU size based on the current tx rate by setting + * max_rc_amsdu_len in struct ieee80211_sta. + * + * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list + * skbs, needed for zero-copy software A-MSDU. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2021,6 +2034,8 @@ enum ieee80211_hw_flags { IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, IEEE80211_HW_SUPPORTS_REORDERING_BUFFER, IEEE80211_HW_USES_RSS, + IEEE80211_HW_TX_AMSDU, + IEEE80211_HW_TX_FRAG_LIST, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2093,6 +2108,9 @@ enum ieee80211_hw_flags { * size is smaller (an example is LinkSys WRT120N with FW v1.0.07 * build 002 Jun 18 2012). * + * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum + * of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list. + * * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX * (if %IEEE80211_HW_QUEUE_CONTROL is set) * @@ -2147,6 +2165,7 @@ struct ieee80211_hw { u8 max_rate_tries; u8 max_rx_aggregation_subframes; u8 max_tx_aggregation_subframes; + u8 max_tx_fragments; u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 4932e9f243a2..42fa81031dfa 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, size_t len) { struct tid_ampdu_tx *tid_tx; + struct ieee80211_txq *txq; u16 capab, tid; u8 buf_size; bool amsdu; @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); + txq = sta->sta.txq[tid]; + if (!amsdu && txq) + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); + mutex_lock(&sta->ampdu_mlme.mtx); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 52ed2afc408d..b251b2f7f8dd 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -128,6 +128,8 @@ static const char *hw_flag_names[] = { FLAG(NEEDS_UNIQUE_STA_ADDR), FLAG(SUPPORTS_REORDERING_BUFFER), FLAG(USES_RSS), + FLAG(TX_AMSDU), + FLAG(TX_FRAG_LIST), #undef FLAG }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6243109979ed..40c1d343992c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -802,6 +802,7 @@ struct mac80211_qos_map { enum txq_info_flags { IEEE80211_TXQ_STOP, IEEE80211_TXQ_AMPDU, + IEEE80211_TXQ_NO_AMSDU, }; struct txq_info { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cf2aca0cc200..960e13d8ed30 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -416,6 +416,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } + sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 597c8fe672a3..4fa2842ddb25 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1324,6 +1324,10 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, out: spin_unlock_bh(&txqi->queue.lock); + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) + skb_linearize(skb); + return skb; } EXPORT_SYMBOL(ieee80211_tx_dequeue); @@ -2802,6 +2806,154 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta) kfree_rcu(fast_tx, rcu_head); } +static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, + struct sk_buff *skb, int headroom, + int *subframe_len) +{ + int amsdu_len = *subframe_len + sizeof(struct ethhdr); + int padding = (4 - amsdu_len) & 3; + + if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) { + I802_DEBUG_INC(local->tx_expand_skb_head); + + if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) { + wiphy_debug(local->hw.wiphy, + "failed to reallocate TX buffer\n"); + return false; + } + } + + if (padding) { + *subframe_len += padding; + memset(skb_put(skb, padding), 0, padding); + } + + return true; +} + +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr; + struct ethhdr amsdu_hdr; + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); + int subframe_len = skb->len - hdr_len; + void *data; + u8 *qc; + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) + return false; + + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) + return true; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), + &subframe_len)) + return false; + + amsdu_hdr.h_proto = cpu_to_be16(subframe_len); + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); + + data = skb_push(skb, sizeof(amsdu_hdr)); + memmove(data, data + sizeof(amsdu_hdr), hdr_len); + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); + + hdr = data; + qc = ieee80211_get_qos_ctl(hdr); + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + info->control.flags |= IEEE80211_TX_CTRL_AMSDU; + + return true; +} + +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + struct sk_buff **frag_tail, *head; + int subframe_len = skb->len - ETH_ALEN; + u8 max_subframes = sta->sta.max_amsdu_subframes; + int max_frags = local->hw.max_tx_fragments; + int max_amsdu_len = sta->sta.max_amsdu_len; + __be16 len; + void *data; + bool ret = false; + int n = 1, nfrags; + + if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) + return false; + + if (!txq) + return false; + + txqi = to_txq_info(txq); + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags)) + return false; + + if (sta->sta.max_rc_amsdu_len) + max_amsdu_len = min_t(int, max_amsdu_len, + sta->sta.max_rc_amsdu_len); + + spin_lock_bh(&txqi->queue.lock); + + head = skb_peek_tail(&txqi->queue); + if (!head) + goto out; + + if (skb->len + head->len > max_amsdu_len) + goto out; + + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + + nfrags = 1 + skb_shinfo(skb)->nr_frags; + nfrags += 1 + skb_shinfo(head)->nr_frags; + frag_tail = &skb_shinfo(head)->frag_list; + while (*frag_tail) { + nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags; + frag_tail = &(*frag_tail)->next; + n++; + } + + if (max_subframes && n > max_subframes) + goto out; + + if (max_frags && nfrags > max_frags) + goto out; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2, + &subframe_len)) + goto out; + + ret = true; + data = skb_push(skb, ETH_ALEN + 2); + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN); + + data += 2 * ETH_ALEN; + len = cpu_to_be16(subframe_len); + memcpy(data, &len, 2); + memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header)); + + head->len += skb->len; + head->data_len += skb->len; + *frag_tail = skb; + +out: + spin_unlock_bh(&txqi->queue.lock); + + return ret; +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, @@ -2856,6 +3008,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, ieee80211_tx_stats(dev, skb->len + extra_head); + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) + return true; + /* will not be crypto-handled beyond what we do here, so use false * as the may-encrypt argument for the resize to not account for * more room than we already have in 'extra_head'