mirror of
https://github.com/openwrt/openwrt.git
synced 2026-06-17 12:40:16 +04:00
d22ceb8d24
Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues mapped to RX queues 24–31. LRO hw offloading does not support Scatter-Gather (SG) so it is required to increase the page_pool allocation order to 2 for RX queues 24–31 (LRO queues). Performance comparison between GRO and hw LRO has been carried out using a 10Gbps NIC: GRO: ~2.7 Gbps LRO: ~8.1 Gbps Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Link: https://github.com/openwrt/openwrt/pull/23530 Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
375 lines
12 KiB
Diff
375 lines
12 KiB
Diff
From 598e1ddfe85ad0f4778eeadd5d878209dd931280 Mon Sep 17 00:00:00 2001
|
|
Message-ID: <598e1ddfe85ad0f4778eeadd5d878209dd931280.1779112628.git.lorenzo@kernel.org>
|
|
From: Lorenzo Bianconi <lorenzo@kernel.org>
|
|
Date: Thu, 14 May 2026 20:25:19 +0200
|
|
Subject: [PATCH] net: airoha: Implement LRO TCP support
|
|
|
|
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
|
|
---
|
|
drivers/net/ethernet/airoha/airoha_eth.c | 178 ++++++++++++++++++++--
|
|
drivers/net/ethernet/airoha/airoha_eth.h | 23 +++
|
|
drivers/net/ethernet/airoha/airoha_regs.h | 22 ++-
|
|
3 files changed, 208 insertions(+), 15 deletions(-)
|
|
|
|
--- a/drivers/net/ethernet/airoha/airoha_eth.c
|
|
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
|
|
@@ -13,6 +13,7 @@
|
|
#include <net/dst_metadata.h>
|
|
#include <net/page_pool/helpers.h>
|
|
#include <net/pkt_cls.h>
|
|
+#include <net/tcp.h>
|
|
#include <uapi/linux/ppp_defs.h>
|
|
|
|
#include "airoha_regs.h"
|
|
@@ -439,6 +440,48 @@ static void airoha_fe_crsn_qsel_init(str
|
|
CDM_CRSN_QSEL_Q1));
|
|
}
|
|
|
|
+static void airoha_fe_lro_init_rx_queue(struct airoha_eth *eth, int qdma_id,
|
|
+ int lro_queue_index, int qid,
|
|
+ int buf_size)
|
|
+{
|
|
+ int id = qdma_id + 1;
|
|
+
|
|
+ airoha_fe_rmw(eth, REG_CDM_LRO_LIMIT(id),
|
|
+ CDM_LRO_AGG_NUM_MASK | CDM_LRO_AGG_SIZE_MASK,
|
|
+ FIELD_PREP(CDM_LRO_AGG_SIZE_MASK, buf_size) |
|
|
+ FIELD_PREP(CDM_LRO_AGG_NUM_MASK,
|
|
+ AIROHA_RXQ_LRO_MAX_AGG_COUNT));
|
|
+ airoha_fe_rmw(eth, REG_CDM_LRO_AGE_TIME(id),
|
|
+ CDM_LRO_AGE_TIME_MASK | CDM_LRO_AGG_TIME_MASK,
|
|
+ FIELD_PREP(CDM_LRO_AGE_TIME_MASK,
|
|
+ AIROHA_RXQ_LRO_MAX_AGE_TIME) |
|
|
+ FIELD_PREP(CDM_LRO_AGG_TIME_MASK,
|
|
+ AIROHA_RXQ_LRO_MAX_AGG_TIME));
|
|
+ airoha_fe_rmw(eth, REG_CDM_LRO_RXQ(id, lro_queue_index),
|
|
+ LRO_RXQ_MASK(lro_queue_index),
|
|
+ __field_prep(LRO_RXQ_MASK(lro_queue_index), qid));
|
|
+ airoha_fe_set(eth, REG_CDM_LRO_EN(id), BIT(lro_queue_index));
|
|
+}
|
|
+
|
|
+static void airoha_fe_lro_disable(struct airoha_eth *eth, int qdma_id)
|
|
+{
|
|
+ int i, id = qdma_id + 1;
|
|
+
|
|
+ airoha_fe_clear(eth, REG_CDM_LRO_EN(id), LRO_RXQ_EN_MASK);
|
|
+ airoha_fe_clear(eth, REG_CDM_LRO_LIMIT(id),
|
|
+ CDM_LRO_AGG_NUM_MASK | CDM_LRO_AGG_SIZE_MASK);
|
|
+ airoha_fe_clear(eth, REG_CDM_LRO_AGE_TIME(id),
|
|
+ CDM_LRO_AGE_TIME_MASK | CDM_LRO_AGG_TIME_MASK);
|
|
+ for (i = 0; i < AIROHA_MAX_NUM_LRO_QUEUES; i++)
|
|
+ airoha_fe_clear(eth, REG_CDM_LRO_RXQ(id, i), LRO_RXQ_MASK(i));
|
|
+}
|
|
+
|
|
+static bool airoha_fe_lro_is_enabled(struct airoha_eth *eth, int qdma_id)
|
|
+{
|
|
+ return airoha_fe_get(eth, REG_CDM_LRO_EN(qdma_id + 1),
|
|
+ LRO_RXQ_EN_MASK);
|
|
+}
|
|
+
|
|
static int airoha_fe_init(struct airoha_eth *eth)
|
|
{
|
|
airoha_fe_maccr_init(eth);
|
|
@@ -603,6 +646,85 @@ static int airoha_qdma_get_gdm_port(stru
|
|
return port >= ARRAY_SIZE(eth->ports) ? -EINVAL : port;
|
|
}
|
|
|
|
+static int airoha_qdma_lro_rx_process(struct airoha_queue *q,
|
|
+ struct airoha_qdma_desc *desc)
|
|
+{
|
|
+ u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
|
|
+ u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
|
|
+ u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
|
|
+ u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
|
|
+ struct sk_buff *skb = q->skb;
|
|
+ u32 len, th_off, tcp_ack_seq;
|
|
+ u16 tcp_win, l2_len;
|
|
+ struct tcphdr *th;
|
|
+ bool ipv4, ipv6;
|
|
+
|
|
+ if (FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2) <= 1)
|
|
+ return 0;
|
|
+
|
|
+ ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
|
|
+ ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
|
|
+ if (!ipv4 && !ipv6)
|
|
+ return -EOPNOTSUPP;
|
|
+
|
|
+ l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
|
|
+ len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
|
|
+ if (ipv4) {
|
|
+ struct iphdr *iph;
|
|
+
|
|
+ if (!pskb_may_pull(skb, l2_len + sizeof(*iph)))
|
|
+ return -EINVAL;
|
|
+
|
|
+ iph = (struct iphdr *)(skb->data + l2_len);
|
|
+ if (iph->protocol != IPPROTO_TCP)
|
|
+ return -EOPNOTSUPP;
|
|
+
|
|
+ iph->tot_len = cpu_to_be16(len - l2_len);
|
|
+ iph->check = 0;
|
|
+ iph->check = ip_fast_csum((void *)iph, iph->ihl);
|
|
+ th_off = l2_len + (iph->ihl << 2);
|
|
+ } else {
|
|
+ struct ipv6hdr *ip6h;
|
|
+
|
|
+ if (!pskb_may_pull(skb, l2_len + sizeof(*ip6h)))
|
|
+ return -EINVAL;
|
|
+
|
|
+ ip6h = (struct ipv6hdr *)(skb->data + l2_len);
|
|
+ if (ip6h->nexthdr != NEXTHDR_TCP)
|
|
+ return -EOPNOTSUPP;
|
|
+
|
|
+ ip6h->payload_len = cpu_to_be16(len - l2_len - sizeof(*ip6h));
|
|
+ th_off = l2_len + sizeof(*ip6h);
|
|
+ }
|
|
+
|
|
+ tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
|
|
+ tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
|
|
+
|
|
+ if (!pskb_may_pull(skb, th_off + sizeof(*th)))
|
|
+ return -EINVAL;
|
|
+
|
|
+ th = (struct tcphdr *)(skb->data + th_off);
|
|
+ th->ack_seq = cpu_to_be32(tcp_ack_seq);
|
|
+ th->window = cpu_to_be16(tcp_win);
|
|
+
|
|
+ /* Check tcp timestamp option */
|
|
+ if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
|
|
+ __be32 *topt = (__be32 *)(th + 1);
|
|
+
|
|
+ if (*topt == cpu_to_be32((TCPOPT_NOP << 24) |
|
|
+ (TCPOPT_NOP << 16) |
|
|
+ (TCPOPT_TIMESTAMP << 8) |
|
|
+ TCPOLEN_TIMESTAMP)) {
|
|
+ __le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
|
|
+
|
|
+ put_unaligned_be32(le32_to_cpu(tcp_ts_reply),
|
|
+ topt + 2);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
|
|
{
|
|
enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);
|
|
@@ -650,11 +772,15 @@ static int airoha_qdma_rx_process(struct
|
|
|
|
skb_reserve(q->skb, AIROHA_RX_HEADROOM);
|
|
__skb_put(q->skb, len);
|
|
- skb_mark_for_recycle(q->skb);
|
|
q->skb->dev = port->dev;
|
|
- q->skb->protocol = eth_type_trans(q->skb, port->dev);
|
|
q->skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
skb_record_rx_queue(q->skb, qid);
|
|
+
|
|
+ if (airoha_qdma_lro_rx_process(q, desc) < 0)
|
|
+ goto free_frag;
|
|
+
|
|
+ q->skb->protocol = eth_type_trans(q->skb, port->dev);
|
|
+ skb_mark_for_recycle(q->skb);
|
|
} else { /* scattered frame */
|
|
struct skb_shared_info *shinfo = skb_shinfo(q->skb);
|
|
int nr_frags = shinfo->nr_frags;
|
|
@@ -743,12 +869,13 @@ static int airoha_qdma_rx_napi_poll(stru
|
|
static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
|
|
struct airoha_qdma *qdma, int ndesc)
|
|
{
|
|
+ int pp_order = airoha_qdma_is_lro_queue(q) ? AIROHA_LRO_PAGE_ORDER : 0;
|
|
const struct page_pool_params pp_params = {
|
|
- .order = 0,
|
|
+ .order = pp_order,
|
|
.pool_size = 256,
|
|
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
|
|
.dma_dir = DMA_FROM_DEVICE,
|
|
- .max_len = PAGE_SIZE,
|
|
+ .max_len = PAGE_SIZE << pp_order,
|
|
.nid = NUMA_NO_NODE,
|
|
.dev = qdma->eth->dev,
|
|
.napi = &q->napi,
|
|
@@ -757,7 +884,8 @@ static int airoha_qdma_init_rx_queue(str
|
|
int qid = q - &qdma->q_rx[0], thr;
|
|
dma_addr_t dma_addr;
|
|
|
|
- q->buf_size = PAGE_SIZE / 2;
|
|
+ q->buf_size = airoha_qdma_is_lro_queue(q) ? pp_params.max_len
|
|
+ : pp_params.max_len / 2;
|
|
q->qdma = qdma;
|
|
|
|
q->entry = devm_kzalloc(eth->dev, ndesc * sizeof(*q->entry),
|
|
@@ -2034,6 +2162,67 @@ int airoha_get_fe_port(struct airoha_gdm
|
|
}
|
|
}
|
|
|
|
+static int airoha_dev_set_features(struct net_device *dev,
|
|
+ netdev_features_t features)
|
|
+{
|
|
+ netdev_features_t diff = dev->features ^ features;
|
|
+ struct airoha_gdm_port *port = netdev_priv(dev);
|
|
+ struct airoha_qdma *qdma = port->qdma;
|
|
+ struct airoha_eth *eth = qdma->eth;
|
|
+ int qdma_id = qdma - ð->qdma[0];
|
|
+ int i;
|
|
+
|
|
+ if (!(diff & NETIF_F_LRO))
|
|
+ return 0;
|
|
+
|
|
+ if (netif_running(dev))
|
|
+ return -EBUSY;
|
|
+
|
|
+ /* reset LRO configuration */
|
|
+ if (features & NETIF_F_LRO) {
|
|
+ int lro_queue_index = 0;
|
|
+
|
|
+ if (airoha_fe_lro_is_enabled(eth, qdma_id))
|
|
+ return 0;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
|
|
+ struct airoha_queue *q = &qdma->q_rx[i];
|
|
+ u32 size;
|
|
+
|
|
+ if (!q->ndesc)
|
|
+ continue;
|
|
+
|
|
+ if (!airoha_qdma_is_lro_queue(q))
|
|
+ continue;
|
|
+
|
|
+ size = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
|
|
+ size = min_t(u32, size, CDM_LRO_AGG_SIZE_MASK);
|
|
+ airoha_fe_lro_init_rx_queue(eth, qdma_id,
|
|
+ lro_queue_index, i, size);
|
|
+ lro_queue_index++;
|
|
+ }
|
|
+ } else {
|
|
+ for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
|
|
+ struct airoha_gdm_port *p = eth->ports[i];
|
|
+
|
|
+ if (!p)
|
|
+ continue;
|
|
+
|
|
+ if (p->qdma != qdma)
|
|
+ continue;
|
|
+
|
|
+ if (p->dev == dev)
|
|
+ continue;
|
|
+
|
|
+ if (p->dev->features & NETIF_F_LRO)
|
|
+ return 0;
|
|
+ }
|
|
+ airoha_fe_lro_disable(eth, qdma_id);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
|
|
struct net_device *dev)
|
|
{
|
|
@@ -2933,6 +3122,7 @@ static const struct net_device_ops airoh
|
|
.ndo_stop = airoha_dev_stop,
|
|
.ndo_change_mtu = airoha_dev_change_mtu,
|
|
.ndo_select_queue = airoha_dev_select_queue,
|
|
+ .ndo_set_features = airoha_dev_set_features,
|
|
.ndo_start_xmit = airoha_dev_xmit,
|
|
.ndo_get_stats64 = airoha_dev_get_stats64,
|
|
.ndo_set_mac_address = airoha_dev_set_macaddr,
|
|
@@ -3150,12 +3340,9 @@ static int airoha_alloc_gdm_port(struct
|
|
dev->ethtool_ops = &airoha_ethtool_ops;
|
|
dev->max_mtu = AIROHA_MAX_MTU;
|
|
dev->watchdog_timeo = 5 * HZ;
|
|
- dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
|
|
- NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |
|
|
- NETIF_F_SG | NETIF_F_TSO |
|
|
- NETIF_F_HW_TC;
|
|
- dev->features |= dev->hw_features;
|
|
- dev->vlan_features = dev->hw_features;
|
|
+ dev->hw_features = AIROHA_HW_FEATURES | NETIF_F_LRO;
|
|
+ dev->features |= AIROHA_HW_FEATURES;
|
|
+ dev->vlan_features = AIROHA_HW_FEATURES;
|
|
dev->dev.of_node = np;
|
|
SET_NETDEV_DEV(dev, eth->dev);
|
|
|
|
--- a/drivers/net/ethernet/airoha/airoha_eth.h
|
|
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
|
|
@@ -43,6 +43,18 @@
|
|
(_n) == 15 ? 128 : \
|
|
(_n) == 0 ? 1024 : 16)
|
|
|
|
+#define AIROHA_LRO_PAGE_ORDER 2
|
|
+#define AIROHA_MAX_NUM_LRO_QUEUES 8
|
|
+#define AIROHA_RXQ_LRO_EN_MASK 0xff000000
|
|
+#define AIROHA_RXQ_LRO_MAX_AGG_COUNT 64
|
|
+#define AIROHA_RXQ_LRO_MAX_AGG_TIME 100
|
|
+#define AIROHA_RXQ_LRO_MAX_AGE_TIME 2000 /* 1ms */
|
|
+
|
|
+#define AIROHA_HW_FEATURES \
|
|
+ (NETIF_F_IP_CSUM | NETIF_F_RXCSUM | \
|
|
+ NETIF_F_TSO6 | NETIF_F_IPV6_CSUM | \
|
|
+ NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC)
|
|
+
|
|
#define PSE_RSV_PAGES 128
|
|
#define PSE_QUEUE_RSV_PAGES 64
|
|
|
|
@@ -666,6 +678,18 @@ static inline bool airoha_is_7583(struct
|
|
return eth->soc->version == 0x7583;
|
|
}
|
|
|
|
+static inline bool airoha_qdma_is_lro_queue(struct airoha_queue *q)
|
|
+{
|
|
+ struct airoha_qdma *qdma = q->qdma;
|
|
+ int qid = q - &qdma->q_rx[0];
|
|
+
|
|
+ /* EN7581 SoC supports at most 8 LRO rx queues */
|
|
+ BUILD_BUG_ON(hweight32(AIROHA_RXQ_LRO_EN_MASK) >
|
|
+ AIROHA_MAX_NUM_LRO_QUEUES);
|
|
+
|
|
+ return !!(AIROHA_RXQ_LRO_EN_MASK & BIT(qid));
|
|
+}
|
|
+
|
|
int airoha_get_fe_port(struct airoha_gdm_port *port);
|
|
bool airoha_is_valid_gdm_port(struct airoha_eth *eth,
|
|
struct airoha_gdm_port *port);
|
|
--- a/drivers/net/ethernet/airoha/airoha_regs.h
|
|
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
|
|
@@ -122,6 +122,20 @@
|
|
#define CDM_CRSN_QSEL_REASON_MASK(_n) \
|
|
GENMASK(4 + (((_n) % 4) << 3), (((_n) % 4) << 3))
|
|
|
|
+#define REG_CDM_LRO_RXQ(_n, _m) (CDM_BASE(_n) + 0x78 + ((_m) & 0x4))
|
|
+#define LRO_RXQ_MASK(_n) GENMASK(4 + (((_n) & 0x3) << 3), ((_n) & 0x3) << 3)
|
|
+
|
|
+#define REG_CDM_LRO_EN(_n) (CDM_BASE(_n) + 0x80)
|
|
+#define LRO_RXQ_EN_MASK GENMASK(7, 0)
|
|
+
|
|
+#define REG_CDM_LRO_LIMIT(_n) (CDM_BASE(_n) + 0x84)
|
|
+#define CDM_LRO_AGG_NUM_MASK GENMASK(23, 16)
|
|
+#define CDM_LRO_AGG_SIZE_MASK GENMASK(15, 0)
|
|
+
|
|
+#define REG_CDM_LRO_AGE_TIME(_n) (CDM_BASE(_n) + 0x88)
|
|
+#define CDM_LRO_AGE_TIME_MASK GENMASK(31, 16)
|
|
+#define CDM_LRO_AGG_TIME_MASK GENMASK(15, 0)
|
|
+
|
|
#define REG_GDM_FWD_CFG(_n) GDM_BASE(_n)
|
|
#define GDM_PAD_EN_MASK BIT(28)
|
|
#define GDM_DROP_CRC_ERR_MASK BIT(23)
|
|
@@ -895,9 +909,15 @@
|
|
#define QDMA_ETH_RXMSG_SPORT_MASK GENMASK(25, 21)
|
|
#define QDMA_ETH_RXMSG_CRSN_MASK GENMASK(20, 16)
|
|
#define QDMA_ETH_RXMSG_PPE_ENTRY_MASK GENMASK(15, 0)
|
|
+/* RX MSG2 */
|
|
+#define QDMA_ETH_RXMSG_AGG_COUNT_MASK GENMASK(31, 24)
|
|
+#define QDMA_ETH_RXMSG_L2_LEN_MASK GENMASK(6, 0)
|
|
+/* RX MSG3 */
|
|
+#define QDMA_ETH_RXMSG_AGG_LEN_MASK GENMASK(31, 16)
|
|
+#define QDMA_ETH_RXMSG_TCP_WIN_MASK GENMASK(15, 0)
|
|
|
|
struct airoha_qdma_desc {
|
|
- __le32 rsv;
|
|
+ __le32 tcp_ts_reply;
|
|
__le32 ctrl;
|
|
__le32 addr;
|
|
__le32 data;
|