Commit 36403dee authored by nbd's avatar nbd
Browse files

ralink: fix ethernet feature TSO not work



* fix TSO features verify on mt7621 firewrt board
* improve tx clean up. no need to access uncached
  memory. also use TX_DTX register instead of
  read tx ring DONE bit
* mt7621 need napi weight 64 to get more performance
* remove netif_receive_skb, after kernel version
  3.7 tcp4_gro_receive can handle tcp checksum.
  on rt2880 use iperf tcp LAN to WAN throughput test.
  with gro 135 Mbits/sec. without gro 80.4Mbits/sec.
Signed-off-by: default avatarmichael lee <igvtee@gmail.com>

git-svn-id: svn://svn.openwrt.org/openwrt/trunk@44118 3c298f89-4303-0410-b956-a3cf2f4a3e73
parent 7488ccfd
......@@ -208,5 +208,5 @@ void fe_set_ethtool_ops(struct net_device *netdev)
fe_ethtool_ops.get_ethtool_stats = fe_get_ethtool_stats;
}
SET_ETHTOOL_OPS(netdev, &fe_ethtool_ops);
netdev->ethtool_ops = &fe_ethtool_ops;
}
......@@ -190,14 +190,6 @@ static inline void fe_get_rxd(struct fe_rx_dma *rxd, struct fe_rx_dma *dma_rxd)
rxd->rxd4 = dma_rxd->rxd4;
}
static inline void fe_get_txd(struct fe_tx_dma *txd, struct fe_tx_dma *dma_txd)
{
txd->txd1 = dma_txd->txd1;
txd->txd2 = dma_txd->txd2;
txd->txd3 = dma_txd->txd3;
txd->txd4 = dma_txd->txd4;
}
static inline void fe_set_txd(struct fe_tx_dma *txd, struct fe_tx_dma *dma_txd)
{
dma_txd->txd1 = txd->txd1;
......@@ -289,17 +281,41 @@ no_rx_mem:
return -ENOMEM;
}
static void fe_txd_unmap(struct device *dev, struct fe_tx_buf *tx_buf)
{
if (tx_buf->flags & FE_TX_FLAGS_SINGLE0) {
dma_unmap_single(dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
} else if (tx_buf->flags & FE_TX_FLAGS_PAGE0) {
dma_unmap_page(dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
}
if (tx_buf->flags & FE_TX_FLAGS_PAGE1)
dma_unmap_page(dev,
dma_unmap_addr(tx_buf, dma_addr1),
dma_unmap_len(tx_buf, dma_len1),
DMA_TO_DEVICE);
tx_buf->flags = 0;
if (tx_buf->skb && (tx_buf->skb != (struct sk_buff *) DMA_DUMMY_DESC)) {
dev_kfree_skb_any(tx_buf->skb);
}
tx_buf->skb = NULL;
}
static void fe_clean_tx(struct fe_priv *priv)
{
int i;
if (priv->tx_skb) {
for (i = 0; i < NUM_DMA_DESC; i++) {
if (priv->tx_skb[i])
dev_kfree_skb_any(priv->tx_skb[i]);
}
kfree(priv->tx_skb);
priv->tx_skb = NULL;
if (priv->tx_buf) {
for (i = 0; i < NUM_DMA_DESC; i++)
fe_txd_unmap(&priv->netdev->dev, &priv->tx_buf[i]);
kfree(priv->tx_buf);
priv->tx_buf = NULL;
}
if (priv->tx_dma) {
......@@ -317,9 +333,9 @@ static int fe_alloc_tx(struct fe_priv *priv)
priv->tx_free_idx = 0;
priv->tx_skb = kcalloc(NUM_DMA_DESC, sizeof(*priv->tx_skb),
priv->tx_buf = kcalloc(NUM_DMA_DESC, sizeof(*priv->tx_buf),
GFP_KERNEL);
if (!priv->tx_skb)
if (!priv->tx_buf)
goto no_tx_mem;
priv->tx_dma = dma_alloc_coherent(&priv->netdev->dev,
......@@ -332,7 +348,6 @@ static int fe_alloc_tx(struct fe_priv *priv)
for (i = 0; i < NUM_DMA_DESC; i++) {
if (priv->soc->tx_dma) {
priv->soc->tx_dma(&priv->tx_dma[i]);
continue;
}
priv->tx_dma[i].txd2 = TX_DMA_DESP2_DEF;
}
......@@ -372,39 +387,19 @@ static void fe_free_dma(struct fe_priv *priv)
netdev_reset_queue(priv->netdev);
}
static inline void txd_unmap_single(struct device *dev, struct fe_tx_dma *txd)
{
if (txd->txd1 && TX_DMA_GET_PLEN0(txd->txd2))
dma_unmap_single(dev, txd->txd1,
TX_DMA_GET_PLEN0(txd->txd2),
DMA_TO_DEVICE);
}
static inline void txd_unmap_page0(struct device *dev, struct fe_tx_dma *txd)
{
if (txd->txd1 && TX_DMA_GET_PLEN0(txd->txd2))
dma_unmap_page(dev, txd->txd1,
TX_DMA_GET_PLEN0(txd->txd2),
DMA_TO_DEVICE);
}
static inline void txd_unmap_page1(struct device *dev, struct fe_tx_dma *txd)
{
if (txd->txd3 && TX_DMA_GET_PLEN1(txd->txd2))
dma_unmap_page(dev, txd->txd3,
TX_DMA_GET_PLEN1(txd->txd2),
DMA_TO_DEVICE);
}
void fe_stats_update(struct fe_priv *priv)
{
struct fe_hw_stats *hwstats = priv->hw_stats;
unsigned int base = fe_reg_table[FE_REG_FE_COUNTER_BASE];
u64 stats;
u64_stats_update_begin(&hwstats->syncp);
if (IS_ENABLED(CONFIG_SOC_MT7621)) {
hwstats->rx_bytes += fe_r32(base);
stats = fe_r32(base + 0x04);
if (stats)
hwstats->rx_bytes += (stats << 32);
hwstats->rx_packets += fe_r32(base + 0x08);
hwstats->rx_overflow += fe_r32(base + 0x10);
hwstats->rx_fcs_errors += fe_r32(base + 0x14);
......@@ -415,6 +410,9 @@ void fe_stats_update(struct fe_priv *priv)
hwstats->tx_skip += fe_r32(base + 0x28);
hwstats->tx_collisions += fe_r32(base + 0x2c);
hwstats->tx_bytes += fe_r32(base + 0x30);
stats = fe_r32(base + 0x34);
if (stats)
hwstats->tx_bytes += (stats << 32);
hwstats->tx_packets += fe_r32(base + 0x38);
} else {
hwstats->tx_bytes += fe_r32(base);
......@@ -525,19 +523,21 @@ static int fe_vlan_rx_kill_vid(struct net_device *dev,
}
static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
int idx)
int idx, int tx_num)
{
struct fe_priv *priv = netdev_priv(dev);
struct skb_frag_struct *frag;
struct fe_tx_dma txd, *ptxd;
struct fe_tx_buf *tx_buf;
dma_addr_t mapped_addr;
unsigned int nr_frags;
u32 def_txd4;
int i, j, unmap_idx, tx_num;
int i, j, k, frag_size, frag_map_size, offset;
tx_buf = &priv->tx_buf[idx];
memset(tx_buf, 0, sizeof(*tx_buf));
memset(&txd, 0, sizeof(txd));
nr_frags = skb_shinfo(skb)->nr_frags;
tx_num = 1 + (nr_frags >> 1);
/* init tx descriptor */
if (priv->soc->tx_dma)
......@@ -546,9 +546,6 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
txd.txd4 = TX_DMA_DESP4_DEF;
def_txd4 = txd.txd4;
/* use dma_unmap_single to free it */
txd.txd4 |= priv->soc->tx_udf_bit;
/* TX Checksum offload */
if (skb->ip_summed == CHECKSUM_PARTIAL)
txd.txd4 |= TX_DMA_CHKSUM;
......@@ -584,78 +581,92 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
txd.txd1 = mapped_addr;
txd.txd2 = TX_DMA_PLEN0(skb_headlen(skb));
tx_buf->flags |= FE_TX_FLAGS_SINGLE0;
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
/* TX SG offload */
j = idx;
k = 0;
for (i = 0; i < nr_frags; i++) {
offset = 0;
frag = &skb_shinfo(skb)->frags[i];
mapped_addr = skb_frag_dma_map(&dev->dev, frag, 0,
skb_frag_size(frag), DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
goto err_dma;
if (i & 0x1) {
j = NEXT_TX_DESP_IDX(j);
txd.txd1 = mapped_addr;
txd.txd2 = TX_DMA_PLEN0(frag->size);
txd.txd4 = def_txd4;
} else {
txd.txd3 = mapped_addr;
txd.txd2 |= TX_DMA_PLEN1(frag->size);
if (i != (nr_frags -1)) {
fe_set_txd(&txd, &priv->tx_dma[j]);
memset(&txd, 0, sizeof(txd));
frag_size = skb_frag_size(frag);
while (frag_size > 0) {
frag_map_size = min(frag_size, TX_DMA_BUF_LEN);
mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
frag_map_size, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
goto err_dma;
if (k & 0x1) {
j = NEXT_TX_DESP_IDX(j);
txd.txd1 = mapped_addr;
txd.txd2 = TX_DMA_PLEN0(frag_map_size);
txd.txd4 = def_txd4;
tx_buf = &priv->tx_buf[j];
memset(tx_buf, 0, sizeof(*tx_buf));
tx_buf->flags |= FE_TX_FLAGS_PAGE0;
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
} else {
txd.txd3 = mapped_addr;
txd.txd2 |= TX_DMA_PLEN1(frag_map_size);
tx_buf->skb = (struct sk_buff *) DMA_DUMMY_DESC;
tx_buf->flags |= FE_TX_FLAGS_PAGE1;
dma_unmap_addr_set(tx_buf, dma_addr1, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len1, frag_map_size);
if (!((i == (nr_frags -1)) &&
(frag_map_size == frag_size))) {
fe_set_txd(&txd, &priv->tx_dma[j]);
memset(&txd, 0, sizeof(txd));
}
}
priv->tx_skb[j] = (struct sk_buff *) DMA_DUMMY_DESC;
frag_size -= frag_map_size;
offset += frag_map_size;
k++;
}
}
/* set last segment */
if (nr_frags & 0x1)
if (k & 0x1)
txd.txd2 |= TX_DMA_LS1;
else
txd.txd2 |= TX_DMA_LS0;
fe_set_txd(&txd, &priv->tx_dma[j]);
/* store skb to cleanup */
priv->tx_skb[j] = skb;
tx_buf->skb = skb;
netdev_sent_queue(dev, skb->len);
skb_tx_timestamp(skb);
wmb();
j = NEXT_TX_DESP_IDX(j);
wmb();
fe_reg_w32(j, FE_REG_TX_CTX_IDX0);
return 0;
err_dma:
/* unmap dma */
ptxd = &priv->tx_dma[idx];
txd_unmap_single(&dev->dev, ptxd);
j = idx;
unmap_idx = i;
for (i = 0; i < unmap_idx; i++) {
if (i & 0x1) {
j = NEXT_TX_DESP_IDX(j);
ptxd = &priv->tx_dma[j];
txd_unmap_page0(&dev->dev, ptxd);
} else {
txd_unmap_page1(&dev->dev, ptxd);
}
}
err_out:
/* reinit descriptors and skb */
j = idx;
for (i = 0; i < tx_num; i++) {
priv->tx_dma[j].txd2 = TX_DMA_DESP2_DEF;
priv->tx_skb[j] = NULL;
ptxd = &priv->tx_dma[j];
tx_buf = &priv->tx_buf[j];
/* unmap dma */
fe_txd_unmap(&dev->dev, tx_buf);
ptxd->txd2 = TX_DMA_DESP2_DEF;
j = NEXT_TX_DESP_IDX(j);
}
wmb();
err_out:
return -1;
}
......@@ -695,6 +706,24 @@ static inline u32 fe_empty_txd(struct fe_priv *priv, u32 tx_fill_idx)
(NUM_DMA_DESC - 1)));
}
static inline int fe_cal_txd_req(struct sk_buff *skb)
{
int i, nfrags;
struct skb_frag_struct *frag;
nfrags = 1;
if (skb_is_gso(skb)) {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
frag = &skb_shinfo(skb)->frags[i];
nfrags += DIV_ROUND_UP(frag->size, TX_DMA_BUF_LEN);
}
} else {
nfrags += skb_shinfo(skb)->nr_frags;
}
return DIV_ROUND_UP(nfrags, 2);
}
static int fe_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct fe_priv *priv = netdev_priv(dev);
......@@ -708,7 +737,7 @@ static int fe_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
tx_num = 1 + (skb_shinfo(skb)->nr_frags >> 1);
tx_num = fe_cal_txd_req(skb);
tx = fe_reg_r32(FE_REG_TX_CTX_IDX0);
if (unlikely(fe_empty_txd(priv, tx) <= tx_num))
{
......@@ -718,9 +747,7 @@ static int fe_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_BUSY;
}
if (fe_tx_map_dma(skb, dev, tx) < 0) {
kfree_skb(skb);
if (fe_tx_map_dma(skb, dev, tx, tx_num) < 0) {
stats->tx_dropped++;
} else {
stats->tx_packets++;
......@@ -745,7 +772,7 @@ static inline void fe_rx_vlan(struct sk_buff *skb)
}
static int fe_poll_rx(struct napi_struct *napi, int budget,
struct fe_priv *priv)
struct fe_priv *priv, u32 rx_intr)
{
struct net_device *netdev = priv->netdev;
struct net_device_stats *stats = &netdev->stats;
......@@ -767,6 +794,7 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
pad = 0;
else
pad = NET_IP_ALIGN;
while (done < budget) {
unsigned int pktlen;
dma_addr_t dma_addr;
......@@ -818,10 +846,7 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
stats->rx_packets++;
stats->rx_bytes += pktlen;
if (skb->ip_summed == CHECKSUM_NONE)
netif_receive_skb(skb);
else
napi_gro_receive(napi, skb);
napi_gro_receive(napi, skb);
priv->rx_data[idx] = new_data;
rxd->rxd1 = (unsigned int) dma_addr;
......@@ -837,40 +862,39 @@ release_desc:
done++;
}
if (done < budget)
fe_reg_w32(rx_intr, FE_REG_FE_INT_STATUS);
return done;
}
static int fe_poll_tx(struct fe_priv *priv, int budget)
static int fe_poll_tx(struct fe_priv *priv, int budget, u32 tx_intr)
{
struct net_device *netdev = priv->netdev;
struct device *dev = &netdev->dev;
unsigned int bytes_compl = 0;
struct sk_buff *skb;
struct fe_tx_dma txd;
int done = 0, idx;
u32 udf_bit = priv->soc->tx_udf_bit;
struct fe_tx_buf *tx_buf;
int done = 0;
u32 idx, hwidx;
hwidx = fe_reg_r32(FE_REG_TX_DTX_IDX0);
idx = priv->tx_free_idx;
while (done < budget) {
fe_get_txd(&txd, &priv->tx_dma[idx]);
skb = priv->tx_skb[idx];
if (!(txd.txd2 & TX_DMA_DONE) || !skb)
break;
txd_unmap_page1(dev, &txd);
txpoll_again:
while ((idx != hwidx) && budget) {
tx_buf = &priv->tx_buf[idx];
skb = tx_buf->skb;
if (txd.txd4 & udf_bit)
txd_unmap_single(dev, &txd);
else
txd_unmap_page0(dev, &txd);
if (!skb)
break;
if (skb != (struct sk_buff *) DMA_DUMMY_DESC) {
bytes_compl += skb->len;
dev_kfree_skb_any(skb);
done++;
budget--;
}
priv->tx_skb[idx] = NULL;
fe_txd_unmap(dev, tx_buf);
idx = NEXT_TX_DESP_IDX(idx);
}
priv->tx_free_idx = idx;
......@@ -878,6 +902,13 @@ static int fe_poll_tx(struct fe_priv *priv, int budget)
if (!done)
return 0;
if (done < budget) {
hwidx = fe_reg_r32(FE_REG_TX_DTX_IDX0);
if (idx != hwidx)
goto txpoll_again;
fe_reg_w32(tx_intr, FE_REG_FE_INT_STATUS);
}
netdev_completed_queue(netdev, done, bytes_compl);
if (unlikely(netif_queue_stopped(netdev) &&
netif_carrier_ok(netdev))) {
......@@ -900,21 +931,11 @@ static int fe_poll(struct napi_struct *napi, int budget)
rx_intr = priv->soc->rx_int;
tx_done = rx_done = 0;
poll_again:
if (status & tx_intr) {
tx_done += fe_poll_tx(priv, budget - tx_done);
if (tx_done < budget) {
fe_reg_w32(tx_intr, FE_REG_FE_INT_STATUS);
}
status = fe_reg_r32(FE_REG_FE_INT_STATUS);
}
if (status & tx_intr)
tx_done = fe_poll_tx(priv, budget, tx_intr);
if (status & rx_intr) {
rx_done += fe_poll_rx(napi, budget - rx_done, priv);
if (rx_done < budget) {
fe_reg_w32(rx_intr, FE_REG_FE_INT_STATUS);
}
}
if (status & rx_intr)
rx_done = fe_poll_rx(napi, budget, priv, rx_intr);
if (unlikely(hwstat && (status & FE_CNT_GDM_AF))) {
if (spin_trylock(&hwstat->stats_lock)) {
......@@ -933,13 +954,14 @@ poll_again:
if ((tx_done < budget) && (rx_done < budget)) {
status = fe_reg_r32(FE_REG_FE_INT_STATUS);
if (status & (tx_intr | rx_intr )) {
if (status & (tx_intr | rx_intr ))
goto poll_again;
}
napi_complete(napi);
fe_int_enable(tx_intr | rx_intr);
}
poll_again:
return rx_done;
}
......@@ -984,8 +1006,10 @@ static irqreturn_t fe_handle_irq(int irq, void *dev)
int_mask = (priv->soc->rx_int | priv->soc->tx_int);
if (likely(status & int_mask)) {
fe_int_disable(int_mask);
napi_schedule(&priv->rx_napi);
if (likely(napi_schedule_prep(&priv->rx_napi))) {
fe_int_disable(int_mask);
__napi_schedule(&priv->rx_napi);
}
} else {
fe_reg_w32(status, FE_REG_FE_INT_STATUS);
}
......@@ -1086,6 +1110,9 @@ static int fe_hw_init(struct net_device *dev)
else
fe_hw_set_macaddr(priv, dev->dev_addr);
/* disable delay interrupt */
fe_reg_w32(0, FE_REG_DLY_INT_CFG);
fe_int_disable(priv->soc->tx_int | priv->soc->rx_int);
/* frame engine will push VLAN tag regarding to VIDX feild in Tx desc. */
......@@ -1373,7 +1400,7 @@ static int fe_probe(struct platform_device *pdev)
struct net_device *netdev;
struct fe_priv *priv;
struct clk *sysclk;
int err;
int err, napi_weight;
device_reset(&pdev->dev);
......@@ -1449,7 +1476,10 @@ static int fe_probe(struct platform_device *pdev)
}
INIT_WORK(&priv->pending_work, fe_pending_work);
netif_napi_add(netdev, &priv->rx_napi, fe_poll, 32);
napi_weight = 32;
if (priv->flags & FE_FLAG_NAPI_WEIGHT)
napi_weight = 64;
netif_napi_add(netdev, &priv->rx_napi, fe_poll, napi_weight);
fe_set_ethtool_ops(netdev);
err = register_netdev(netdev);
......
......@@ -51,7 +51,7 @@ enum fe_work_flag {
FE_FLAG_MAX
};
#define FE_DRV_VERSION "0.1.1"
#define FE_DRV_VERSION "0.1.2"
/* power of 2 to let NEXT_TX_DESP_IDX work */
#ifdef CONFIG_SOC_MT7621
......@@ -322,11 +322,12 @@ struct fe_rx_dma {
unsigned int rxd4;
} __packed __aligned(4);
#define TX_DMA_PLEN0_MASK ((0x3fff) << 16)
#define TX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16)
#define TX_DMA_PLEN1(_x) ((_x) & 0x3fff)
#define TX_DMA_GET_PLEN0(_x) (((_x) >> 16 ) & 0x3fff)
#define TX_DMA_GET_PLEN1(_x) ((_x) & 0x3fff)
#define TX_DMA_BUF_LEN 0x3fff
#define TX_DMA_PLEN0_MASK (TX_DMA_BUF_LEN << 16)
#define TX_DMA_PLEN0(_x) (((_x) & TX_DMA_BUF_LEN) << 16)
#define TX_DMA_PLEN1(_x) ((_x) & TX_DMA_BUF_LEN)
#define TX_DMA_GET_PLEN0(_x) (((_x) >> 16 ) & TX_DMA_BUF_LEN)
#define TX_DMA_GET_PLEN1(_x) ((_x) & TX_DMA_BUF_LEN)
#define TX_DMA_LS1 BIT(14)
#define TX_DMA_LS0 BIT(30)
#define TX_DMA_DONE BIT(31)
......@@ -401,7 +402,6 @@ struct fe_soc_data
u32 rx_int;
u32 tx_int;
u32 checksum_bit;
u32 tx_udf_bit;
};
#define FE_FLAG_PADDING_64B BIT(0)
......@@ -410,6 +410,7 @@ struct fe_soc_data
#define FE_FLAG_RX_2B_OFFSET BIT(3)
#define FE_FLAG_RX_SG_DMA BIT(4)
#define FE_FLAG_RX_VLAN_CTAG BIT(5)
#define FE_FLAG_NAPI_WEIGHT BIT(6)
#define FE_STAT_REG_DECLARE \
_FE(tx_bytes) \
......@@ -434,6 +435,22 @@ FE_STAT_REG_DECLARE
#undef _FE
};
enum fe_tx_flags {