Merge branch 'net-use-netdev-lock-to-protect-napi'

Jakub Kicinski says:

====================
net: use netdev->lock to protect NAPI

We recently added a lock member to struct net_device, with a vague
plan to start using it to protect netdev-local state, removing
the need to take rtnl_lock for new configuration APIs.

Lay some groundwork and use this lock for protecting NAPI APIs.

v1: https://lore.kernel.org/20250114035118.110297-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250115035319.559603-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-01-15 19:13:36 -08:00
11 changed files with 420 additions and 121 deletions
+9 -2
View File
@@ -462,7 +462,7 @@ static void pcnet32_netif_start(struct net_device *dev)
val = lp->a->read_csr(ioaddr, CSR3);
val &= 0x00ff;
lp->a->write_csr(ioaddr, CSR3, val);
napi_enable(&lp->napi);
napi_enable_locked(&lp->napi);
}
/*
@@ -889,6 +889,7 @@ static int pcnet32_set_ringparam(struct net_device *dev,
if (netif_running(dev))
pcnet32_netif_stop(dev);
netdev_lock(dev);
spin_lock_irqsave(&lp->lock, flags);
lp->a->write_csr(ioaddr, CSR0, CSR0_STOP); /* stop the chip */
@@ -920,6 +921,7 @@ static int pcnet32_set_ringparam(struct net_device *dev,
}
spin_unlock_irqrestore(&lp->lock, flags);
netdev_unlock(dev);
netif_info(lp, drv, dev, "Ring Param Settings: RX: %d, TX: %d\n",
lp->rx_ring_size, lp->tx_ring_size);
@@ -985,6 +987,7 @@ static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1)
if (netif_running(dev))
pcnet32_netif_stop(dev);
netdev_lock(dev);
spin_lock_irqsave(&lp->lock, flags);
lp->a->write_csr(ioaddr, CSR0, CSR0_STOP); /* stop the chip */
@@ -1122,6 +1125,7 @@ clean_up:
lp->a->write_bcr(ioaddr, 20, 4); /* return to 16bit mode */
}
spin_unlock_irqrestore(&lp->lock, flags);
netdev_unlock(dev);
return rc;
} /* end pcnet32_loopback_test */
@@ -2101,6 +2105,7 @@ static int pcnet32_open(struct net_device *dev)
return -EAGAIN;
}
netdev_lock(dev);
spin_lock_irqsave(&lp->lock, flags);
/* Check for a valid station address */
if (!is_valid_ether_addr(dev->dev_addr)) {
@@ -2266,7 +2271,7 @@ static int pcnet32_open(struct net_device *dev)
goto err_free_ring;
}
napi_enable(&lp->napi);
napi_enable_locked(&lp->napi);
/* Re-initialize the PCNET32, and start it when done. */
lp->a->write_csr(ioaddr, 1, (lp->init_dma_addr & 0xffff));
@@ -2300,6 +2305,7 @@ static int pcnet32_open(struct net_device *dev)
lp->a->read_csr(ioaddr, CSR0));
spin_unlock_irqrestore(&lp->lock, flags);
netdev_unlock(dev);
return 0; /* Always succeed */
@@ -2315,6 +2321,7 @@ err_free_ring:
err_free_irq:
spin_unlock_irqrestore(&lp->lock, flags);
netdev_unlock(dev);
free_irq(dev->irq, dev);
return rc;
}
+42 -42
View File
@@ -1180,7 +1180,7 @@ static void iavf_napi_enable_all(struct iavf_adapter *adapter)
q_vector = &adapter->q_vectors[q_idx];
napi = &q_vector->napi;
napi_enable(napi);
napi_enable_locked(napi);
}
}
@@ -1196,7 +1196,7 @@ static void iavf_napi_disable_all(struct iavf_adapter *adapter)
for (q_idx = 0; q_idx < q_vectors; q_idx++) {
q_vector = &adapter->q_vectors[q_idx];
napi_disable(&q_vector->napi);
napi_disable_locked(&q_vector->napi);
}
}
@@ -1800,8 +1800,8 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
q_vector->v_idx = q_idx;
q_vector->reg_idx = q_idx;
cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
netif_napi_add(adapter->netdev, &q_vector->napi,
iavf_napi_poll);
netif_napi_add_locked(adapter->netdev, &q_vector->napi,
iavf_napi_poll);
}
return 0;
@@ -1827,7 +1827,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
netif_napi_del(&q_vector->napi);
netif_napi_del_locked(&q_vector->napi);
}
kfree(adapter->q_vectors);
adapter->q_vectors = NULL;
@@ -1977,7 +1977,7 @@ static void iavf_finish_config(struct work_struct *work)
* The dev->lock is needed to update the queue number
*/
rtnl_lock();
mutex_lock(&adapter->netdev->lock);
netdev_lock(adapter->netdev);
mutex_lock(&adapter->crit_lock);
if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
@@ -1997,7 +1997,7 @@ static void iavf_finish_config(struct work_struct *work)
netif_set_real_num_tx_queues(adapter->netdev, pairs);
if (adapter->netdev->reg_state != NETREG_REGISTERED) {
mutex_unlock(&adapter->netdev->lock);
netdev_unlock(adapter->netdev);
netdev_released = true;
err = register_netdevice(adapter->netdev);
if (err) {
@@ -2027,7 +2027,7 @@ static void iavf_finish_config(struct work_struct *work)
out:
mutex_unlock(&adapter->crit_lock);
if (!netdev_released)
mutex_unlock(&adapter->netdev->lock);
netdev_unlock(adapter->netdev);
rtnl_unlock();
}
@@ -2724,10 +2724,10 @@ static void iavf_watchdog_task(struct work_struct *work)
struct iavf_hw *hw = &adapter->hw;
u32 reg_val;
mutex_lock(&netdev->lock);
netdev_lock(netdev);
if (!mutex_trylock(&adapter->crit_lock)) {
if (adapter->state == __IAVF_REMOVE) {
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return;
}
@@ -2741,35 +2741,35 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_STARTUP:
iavf_startup(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_VERSION_CHECK:
iavf_init_version_check(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_GET_RESOURCES:
iavf_init_get_resources(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_EXTENDED_CAPS:
iavf_init_process_extended_caps(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_CONFIG_ADAPTER:
iavf_init_config_adapter(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
@@ -2781,7 +2781,7 @@ static void iavf_watchdog_task(struct work_struct *work)
* as it can loop forever
*/
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return;
}
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
@@ -2790,7 +2790,7 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
iavf_shutdown_adminq(hw);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, (5 * HZ));
return;
@@ -2798,7 +2798,7 @@ static void iavf_watchdog_task(struct work_struct *work)
/* Try again from failed step*/
iavf_change_state(adapter, adapter->last_state);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
return;
case __IAVF_COMM_FAILED:
@@ -2811,7 +2811,7 @@ static void iavf_watchdog_task(struct work_struct *work)
iavf_change_state(adapter, __IAVF_INIT_FAILED);
adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return;
}
reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
@@ -2831,14 +2831,14 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task,
msecs_to_jiffies(10));
return;
case __IAVF_RESETTING:
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
HZ * 2);
return;
@@ -2869,7 +2869,7 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_REMOVE:
default:
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return;
}
@@ -2881,14 +2881,14 @@ static void iavf_watchdog_task(struct work_struct *work)
dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, HZ * 2);
return;
}
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
restart_watchdog:
if (adapter->state >= __IAVF_DOWN)
queue_work(adapter->wq, &adapter->adminq_task);
@@ -3015,12 +3015,12 @@ static void iavf_reset_task(struct work_struct *work)
/* When device is being removed it doesn't make sense to run the reset
* task, just return in such a case.
*/
mutex_lock(&netdev->lock);
netdev_lock(netdev);
if (!mutex_trylock(&adapter->crit_lock)) {
if (adapter->state != __IAVF_REMOVE)
queue_work(adapter->wq, &adapter->reset_task);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return;
}
@@ -3068,7 +3068,7 @@ static void iavf_reset_task(struct work_struct *work)
reg_val);
iavf_disable_vf(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -3209,7 +3209,7 @@ continue_reset:
wake_up(&adapter->reset_waitqueue);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return;
reset_err:
@@ -3220,7 +3220,7 @@ reset_err:
iavf_disable_vf(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
}
@@ -3692,10 +3692,10 @@ exit:
if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
return 0;
mutex_lock(&netdev->lock);
netdev_lock(netdev);
netif_set_real_num_rx_queues(netdev, total_qps);
netif_set_real_num_tx_queues(netdev, total_qps);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return ret;
}
@@ -4365,7 +4365,7 @@ static int iavf_open(struct net_device *netdev)
return -EIO;
}
mutex_lock(&netdev->lock);
netdev_lock(netdev);
while (!mutex_trylock(&adapter->crit_lock)) {
/* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
* is already taken and iavf_open is called from an upper
@@ -4373,7 +4373,7 @@ static int iavf_open(struct net_device *netdev)
* We have to leave here to avoid dead lock.
*/
if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) {
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return -EBUSY;
}
@@ -4424,7 +4424,7 @@ static int iavf_open(struct net_device *netdev)
iavf_irq_enable(adapter, true);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return 0;
@@ -4437,7 +4437,7 @@ err_setup_tx:
iavf_free_all_tx_resources(adapter);
err_unlock:
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return err;
}
@@ -4459,12 +4459,12 @@ static int iavf_close(struct net_device *netdev)
u64 aq_to_restore;
int status;
mutex_lock(&netdev->lock);
netdev_lock(netdev);
mutex_lock(&adapter->crit_lock);
if (adapter->state <= __IAVF_DOWN_PENDING) {
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return 0;
}
@@ -4498,7 +4498,7 @@ static int iavf_close(struct net_device *netdev)
iavf_free_traffic_irqs(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
/* We explicitly don't free resources here because the hardware is
* still active and can DMA into memory. Resources are cleared in
@@ -5375,7 +5375,7 @@ static int iavf_suspend(struct device *dev_d)
netif_device_detach(netdev);
mutex_lock(&netdev->lock);
netdev_lock(netdev);
mutex_lock(&adapter->crit_lock);
if (netif_running(netdev)) {
@@ -5387,7 +5387,7 @@ static int iavf_suspend(struct device *dev_d)
iavf_reset_interrupt_capability(adapter);
mutex_unlock(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
return 0;
}
@@ -5486,7 +5486,7 @@ static void iavf_remove(struct pci_dev *pdev)
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
mutex_lock(&netdev->lock);
netdev_lock(netdev);
mutex_lock(&adapter->crit_lock);
dev_info(&adapter->pdev->dev, "Removing device\n");
iavf_change_state(adapter, __IAVF_REMOVE);
@@ -5523,7 +5523,7 @@ static void iavf_remove(struct pci_dev *pdev)
mutex_destroy(&hw->aq.asq_mutex);
mutex_unlock(&adapter->crit_lock);
mutex_destroy(&adapter->crit_lock);
mutex_unlock(&netdev->lock);
netdev_unlock(netdev);
iounmap(hw->hw_addr);
pci_release_regions(pdev);
+4 -1
View File
@@ -4392,6 +4392,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
if (pp->neta_armada3700)
return 0;
netdev_lock(port->napi.dev);
spin_lock(&pp->lock);
/*
* Configuring the driver for a new CPU while the driver is
@@ -4418,7 +4419,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
/* Mask all ethernet port interrupts */
on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
napi_enable(&port->napi);
napi_enable_locked(&port->napi);
/*
* Enable per-CPU interrupts on the CPU that is
@@ -4439,6 +4440,8 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
MVNETA_CAUSE_LINK_CHANGE);
netif_tx_start_all_queues(pp->dev);
spin_unlock(&pp->lock);
netdev_unlock(port->napi.dev);
return 0;
}
+4 -2
View File
@@ -2320,7 +2320,8 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu)
if (ret < 0)
goto out_free_tmp_vptr_1;
napi_disable(&vptr->napi);
netdev_lock(dev);
napi_disable_locked(&vptr->napi);
spin_lock_irqsave(&vptr->lock, flags);
@@ -2342,12 +2343,13 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu)
velocity_give_many_rx_descs(vptr);
napi_enable(&vptr->napi);
napi_enable_locked(&vptr->napi);
mac_enable_int(vptr->mac_regs);
netif_start_queue(dev);
spin_unlock_irqrestore(&vptr->lock, flags);
netdev_unlock(dev);
velocity_free_rings(tmp_vptr);
+2 -2
View File
@@ -108,10 +108,10 @@ nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch)
struct netdevsim *ns = netdev_priv(dev);
int err;
mutex_lock(&dev->lock);
netdev_lock(dev);
err = netif_set_real_num_queues(dev, ch->combined_count,
ch->combined_count);
mutex_unlock(&dev->lock);
netdev_unlock(dev);
if (err)
return err;
+101 -17
View File
@@ -382,8 +382,9 @@ struct napi_struct {
struct sk_buff *skb;
struct list_head rx_list; /* Pending GRO_NORMAL skbs */
int rx_count; /* length of rx_list */
unsigned int napi_id;
unsigned int napi_id; /* protected by netdev_lock */
struct hrtimer timer;
/* all fields past this point are write-protected by netdev_lock */
struct task_struct *thread;
unsigned long gro_flush_timeout;
unsigned long irq_suspend_timeout;
@@ -570,16 +571,11 @@ static inline bool napi_complete(struct napi_struct *n)
int dev_set_threaded(struct net_device *dev, bool threaded);
/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
*
* Stop NAPI from being scheduled on this context.
* Waits till any outstanding processing completes.
*/
void napi_disable(struct napi_struct *n);
void napi_disable_locked(struct napi_struct *n);
void napi_enable(struct napi_struct *n);
void napi_enable_locked(struct napi_struct *n);
/**
* napi_synchronize - wait until NAPI is not running
@@ -2444,8 +2440,27 @@ struct net_device {
u32 napi_defer_hard_irqs;
/**
* @lock: protects @net_shaper_hierarchy, feel free to use for other
* netdev-scope protection. Ordering: take after rtnl_lock.
* @up: copy of @state's IFF_UP, but safe to read with just @lock.
* May report false negatives while the device is being opened
* or closed (@lock does not protect .ndo_open, or .ndo_close).
*/
bool up;
/**
* @lock: netdev-scope lock, protects a small selection of fields.
* Should always be taken using netdev_lock() / netdev_unlock() helpers.
* Drivers are free to use it for other protection.
*
* Protects:
* @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list,
* @net_shaper_hierarchy, @reg_state, @threaded
*
* Partially protects (writers must hold both @lock and rtnl_lock):
* @up
*
* Also protects some fields in struct napi_struct.
*
* Ordering: take after rtnl_lock.
*/
struct mutex lock;
@@ -2671,18 +2686,58 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
enum netdev_queue_type type,
struct napi_struct *napi);
static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
static inline void netdev_lock(struct net_device *dev)
{
mutex_lock(&dev->lock);
}
static inline void netdev_unlock(struct net_device *dev)
{
mutex_unlock(&dev->lock);
}
static inline void netdev_assert_locked(struct net_device *dev)
{
lockdep_assert_held(&dev->lock);
}
static inline void netdev_assert_locked_or_invisible(struct net_device *dev)
{
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING)
netdev_assert_locked(dev);
}
static inline void netif_napi_set_irq_locked(struct napi_struct *napi, int irq)
{
napi->irq = irq;
}
static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
{
netdev_lock(napi->dev);
netif_napi_set_irq_locked(napi, irq);
netdev_unlock(napi->dev);
}
/* Default NAPI poll() weight
* Device drivers are strongly advised to not use bigger value
*/
#define NAPI_POLL_WEIGHT 64
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight);
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
int weight);
static inline void
netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
netdev_lock(dev);
netif_napi_add_weight_locked(dev, napi, poll, weight);
netdev_unlock(dev);
}
/**
* netif_napi_add() - initialize a NAPI context
@@ -2700,6 +2755,13 @@ netif_napi_add(struct net_device *dev, struct napi_struct *napi,
netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}
static inline void
netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int))
{
netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT);
}
static inline void
netif_napi_add_tx_weight(struct net_device *dev,
struct napi_struct *napi,
@@ -2710,6 +2772,15 @@ netif_napi_add_tx_weight(struct net_device *dev,
netif_napi_add_weight(dev, napi, poll, weight);
}
static inline void
netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int index)
{
napi->index = index;
napi->config = &dev->napi_config[index];
netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT);
}
/**
* netif_napi_add_config - initialize a NAPI context with persistent config
* @dev: network device
@@ -2721,9 +2792,9 @@ static inline void
netif_napi_add_config(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int index)
{
napi->index = index;
napi->config = &dev->napi_config[index];
netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
netdev_lock(dev);
netif_napi_add_config_locked(dev, napi, poll, index);
netdev_unlock(dev);
}
/**
@@ -2743,6 +2814,8 @@ static inline void netif_napi_add_tx(struct net_device *dev,
netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}
void __netif_napi_del_locked(struct napi_struct *napi);
/**
* __netif_napi_del - remove a NAPI context
* @napi: NAPI context
@@ -2751,7 +2824,18 @@ static inline void netif_napi_add_tx(struct net_device *dev,
* containing @napi. Drivers might want to call this helper to combine
* all the needed RCU grace periods into a single one.
*/
void __netif_napi_del(struct napi_struct *napi);
static inline void __netif_napi_del(struct napi_struct *napi)
{
netdev_lock(napi->dev);
__netif_napi_del_locked(napi);
netdev_unlock(napi->dev);
}
static inline void netif_napi_del_locked(struct napi_struct *napi)
{
__netif_napi_del_locked(napi);
synchronize_net();
}
/**
* netif_napi_del - remove a NAPI context
+167 -16
View File
@@ -768,7 +768,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
}
/* must be called under rcu_read_lock(), as we dont take a reference */
struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
static struct napi_struct *
netdev_napi_by_id(struct net *net, unsigned int napi_id)
{
struct napi_struct *napi;
@@ -784,6 +785,49 @@ struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
return napi;
}
/**
* netdev_napi_by_id_lock() - find a device by NAPI ID and lock it
* @net: the applicable net namespace
* @napi_id: ID of a NAPI of a target device
*
* Find a NAPI instance with @napi_id. Lock its device.
* The device must be in %NETREG_REGISTERED state for lookup to succeed.
* netdev_unlock() must be called to release it.
*
* Return: pointer to NAPI, its device with lock held, NULL if not found.
*/
struct napi_struct *
netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
{
struct napi_struct *napi;
struct net_device *dev;
rcu_read_lock();
napi = netdev_napi_by_id(net, napi_id);
if (!napi || READ_ONCE(napi->dev->reg_state) != NETREG_REGISTERED) {
rcu_read_unlock();
return NULL;
}
dev = napi->dev;
dev_hold(dev);
rcu_read_unlock();
dev = __netdev_put_lock(dev);
if (!dev)
return NULL;
rcu_read_lock();
napi = netdev_napi_by_id(net, napi_id);
if (napi && napi->dev != dev)
napi = NULL;
rcu_read_unlock();
if (!napi)
netdev_unlock(dev);
return napi;
}
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
@@ -972,6 +1016,73 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
return napi ? napi->dev : NULL;
}
/* Release the held reference on the net_device, and if the net_device
* is still registered try to lock the instance lock. If device is being
* unregistered NULL will be returned (but the reference has been released,
* either way!)
*
* This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away.
*/
struct net_device *__netdev_put_lock(struct net_device *dev)
{
netdev_lock(dev);
if (dev->reg_state > NETREG_REGISTERED) {
netdev_unlock(dev);
dev_put(dev);
return NULL;
}
dev_put(dev);
return dev;
}
/**
* netdev_get_by_index_lock() - find a device by its ifindex
* @net: the applicable net namespace
* @ifindex: index of device
*
* Search for an interface by index. If a valid device
* with @ifindex is found it will be returned with netdev->lock held.
* netdev_unlock() must be called to release it.
*
* Return: pointer to a device with lock held, NULL if not found.
*/
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
{
struct net_device *dev;
dev = dev_get_by_index(net, ifindex);
if (!dev)
return NULL;
return __netdev_put_lock(dev);
}
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index)
{
if (dev)
netdev_unlock(dev);
do {
rcu_read_lock();
dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT);
if (!dev) {
rcu_read_unlock();
return NULL;
}
dev_hold(dev);
rcu_read_unlock();
dev = __netdev_put_lock(dev);
if (dev)
return dev;
(*index)++;
} while (true);
}
static DEFINE_SEQLOCK(netdev_rename_lock);
void netdev_copy_name(struct net_device *dev, char *name)
@@ -1509,7 +1620,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
else {
dev->flags |= IFF_UP;
netif_set_up(dev, true);
dev_set_rx_mode(dev);
dev_activate(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -1588,7 +1699,7 @@ static void __dev_close_many(struct list_head *head)
if (ops->ndo_stop)
ops->ndo_stop(dev);
dev->flags &= ~IFF_UP;
netif_set_up(dev, false);
netpoll_poll_enable(dev);
}
}
@@ -6674,6 +6785,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
struct napi_struct *napi;
int err = 0;
netdev_assert_locked_or_invisible(dev);
if (dev->threaded == threaded)
return 0;
@@ -6800,9 +6913,12 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
int weight)
{
netdev_assert_locked(dev);
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
@@ -6841,15 +6957,17 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
*/
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = false;
netif_napi_set_irq(napi, -1);
netif_napi_set_irq_locked(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
EXPORT_SYMBOL(netif_napi_add_weight_locked);
void napi_disable(struct napi_struct *n)
void napi_disable_locked(struct napi_struct *n)
{
unsigned long val, new;
might_sleep();
netdev_assert_locked(n->dev);
set_bit(NAPI_STATE_DISABLE, &n->state);
val = READ_ONCE(n->state);
@@ -6872,16 +6990,25 @@ void napi_disable(struct napi_struct *n)
clear_bit(NAPI_STATE_DISABLE, &n->state);
}
EXPORT_SYMBOL(napi_disable);
EXPORT_SYMBOL(napi_disable_locked);
/**
* napi_enable - enable NAPI scheduling
* @n: NAPI context
* napi_disable() - prevent NAPI from scheduling
* @n: NAPI context
*
* Resume NAPI from being scheduled on this context.
* Must be paired with napi_disable.
* Stop NAPI from being scheduled on this context.
* Waits till any outstanding processing completes.
* Takes netdev_lock() for associated net_device.
*/
void napi_enable(struct napi_struct *n)
void napi_disable(struct napi_struct *n)
{
netdev_lock(n->dev);
napi_disable_locked(n);
netdev_unlock(n->dev);
}
EXPORT_SYMBOL(napi_disable);
void napi_enable_locked(struct napi_struct *n)
{
unsigned long new, val = READ_ONCE(n->state);
@@ -6898,6 +7025,22 @@ void napi_enable(struct napi_struct *n)
new |= NAPIF_STATE_THREADED;
} while (!try_cmpxchg(&n->state, &val, new));
}
EXPORT_SYMBOL(napi_enable_locked);
/**
* napi_enable() - enable NAPI scheduling
* @n: NAPI context
*
* Enable scheduling of a NAPI instance.
* Must be paired with napi_disable().
* Takes netdev_lock() for associated net_device.
*/
void napi_enable(struct napi_struct *n)
{
netdev_lock(n->dev);
napi_enable_locked(n);
netdev_unlock(n->dev);
}
EXPORT_SYMBOL(napi_enable);
static void flush_gro_hash(struct napi_struct *napi)
@@ -6914,8 +7057,10 @@ static void flush_gro_hash(struct napi_struct *napi)
}
/* Must be called in process context */
void __netif_napi_del(struct napi_struct *napi)
void __netif_napi_del_locked(struct napi_struct *napi)
{
netdev_assert_locked(napi->dev);
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
@@ -6935,7 +7080,7 @@ void __netif_napi_del(struct napi_struct *napi)
napi->thread = NULL;
}
}
EXPORT_SYMBOL(__netif_napi_del);
EXPORT_SYMBOL(__netif_napi_del_locked);
static int __napi_poll(struct napi_struct *n, bool *repoll)
{
@@ -10695,7 +10840,9 @@ int register_netdevice(struct net_device *dev)
ret = netdev_register_kobject(dev);
netdev_lock(dev);
WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
netdev_unlock(dev);
if (ret)
goto err_uninit_notify;
@@ -10969,7 +11116,9 @@ void netdev_run_todo(void)
continue;
}
netdev_lock(dev);
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
netdev_unlock(dev);
linkwatch_sync_dev(dev);
}
@@ -11575,7 +11724,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
unlist_netdevice(dev);
netdev_lock(dev);
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
netdev_unlock(dev);
}
flush_all_backlogs();
+28 -1
View File
@@ -2,6 +2,7 @@
#ifndef _NET_CORE_DEV_H
#define _NET_CORE_DEV_H
#include <linux/cleanup.h>
#include <linux/types.h>
#include <linux/rwsem.h>
#include <linux/netdevice.h>
@@ -22,9 +23,23 @@ struct sd_flow_limit {
extern int netdev_flow_limit_table_len;
struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
struct napi_struct *
netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
struct net_device *__netdev_put_lock(struct net_device *dev);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index);
DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T));
#define for_each_netdev_lock_scoped(net, var_name, ifindex) \
for (struct net_device *var_name __free(netdev_unlock) = NULL; \
(var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \
ifindex++)
#ifdef CONFIG_PROC_FS
int __init dev_proc_init(void);
#else
@@ -112,6 +127,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh);
static inline void netif_set_up(struct net_device *dev, bool value)
{
if (value)
dev->flags |= IFF_UP;
else
dev->flags &= ~IFF_UP;
netdev_lock(dev);
dev->up = value;
netdev_unlock(dev);
}
static inline void netif_set_gso_max_size(struct net_device *dev,
unsigned int size)
{
+35 -4
View File
@@ -36,7 +36,7 @@ static const char fmt_uint[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
/* Caller holds RTNL or RCU */
/* Caller holds RTNL, netdev->lock or RCU */
static inline int dev_isalive(const struct net_device *dev)
{
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
@@ -108,6 +108,36 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
return ret;
}
/* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */
static ssize_t
netdev_lock_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len,
int (*set)(struct net_device *, unsigned long))
{
struct net_device *netdev = to_net_dev(dev);
struct net *net = dev_net(netdev);
unsigned long new;
int ret;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
ret = kstrtoul(buf, 0, &new);
if (ret)
return ret;
netdev_lock(netdev);
if (dev_isalive(netdev)) {
ret = (*set)(netdev, new);
if (ret == 0)
ret = len;
}
netdev_unlock(netdev);
return ret;
}
NETDEVICE_SHOW_RO(dev_id, fmt_hex);
NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
@@ -420,7 +450,7 @@ static ssize_t gro_flush_timeout_store(struct device *dev,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
return netdev_store(dev, attr, buf, len, change_gro_flush_timeout);
return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout);
}
NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
@@ -440,7 +470,8 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
return netdev_lock_store(dev, attr, buf, len,
change_napi_defer_hard_irqs);
}
NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
@@ -638,7 +669,7 @@ static ssize_t threaded_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, modify_napi_threaded);
return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded);
}
static DEVICE_ATTR_RW(threaded);
+25 -31
View File
@@ -167,7 +167,7 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
void *hdr;
pid_t pid;
if (!(napi->dev->flags & IFF_UP))
if (!napi->dev->up)
return 0;
hdr = genlmsg_iput(rsp, info);
@@ -229,20 +229,15 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
if (!rsp)
return -ENOMEM;
rtnl_lock();
rcu_read_lock();
napi = netdev_napi_by_id(genl_info_net(info), napi_id);
napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
netdev_unlock(napi->dev);
} else {
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
err = -ENOENT;
}
rcu_read_unlock();
rtnl_unlock();
if (err) {
goto err_free_msg;
} else if (!rsp->len) {
@@ -266,7 +261,7 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
unsigned int prev_id;
int err = 0;
if (!(netdev->flags & IFF_UP))
if (!netdev->up)
return err;
prev_id = UINT_MAX;
@@ -301,22 +296,22 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (info->attrs[NETDEV_A_NAPI_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
rtnl_lock();
if (ifindex) {
netdev = __dev_get_by_index(net, ifindex);
if (netdev)
netdev = netdev_get_by_index_lock(net, ifindex);
if (netdev) {
err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
else
netdev_unlock(netdev);
} else {
err = -ENODEV;
}
} else {
for_each_netdev_dump(net, netdev, ctx->ifindex) {
for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
ctx->napi_id = 0;
}
}
rtnl_unlock();
return err;
}
@@ -357,20 +352,15 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
rtnl_lock();
rcu_read_lock();
napi = netdev_napi_by_id(genl_info_net(info), napi_id);
napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_set_config(napi, info);
netdev_unlock(napi->dev);
} else {
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
err = -ENOENT;
}
rcu_read_unlock();
rtnl_unlock();
return err;
}
@@ -442,7 +432,7 @@ netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
{
int err;
if (!(netdev->flags & IFF_UP))
if (!netdev->up)
return -ENOENT;
err = netdev_nl_queue_validate(netdev, q_idx, q_type);
@@ -474,11 +464,13 @@ int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
netdev = __dev_get_by_index(genl_info_net(info), ifindex);
if (netdev)
netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
if (netdev) {
err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
else
netdev_unlock(netdev);
} else {
err = -ENODEV;
}
rtnl_unlock();
@@ -499,7 +491,7 @@ netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
{
int err = 0;
if (!(netdev->flags & IFF_UP))
if (!netdev->up)
return err;
for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
@@ -532,13 +524,15 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
if (ifindex) {
netdev = __dev_get_by_index(net, ifindex);
if (netdev)
netdev = netdev_get_by_index_lock(net, ifindex);
if (netdev) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
else
netdev_unlock(netdev);
} else {
err = -ENODEV;
}
} else {
for_each_netdev_dump(net, netdev, ctx->ifindex) {
for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
+3 -3
View File
@@ -40,7 +40,7 @@ static void net_shaper_lock(struct net_shaper_binding *binding)
{
switch (binding->type) {
case NET_SHAPER_BINDING_TYPE_NETDEV:
mutex_lock(&binding->netdev->lock);
netdev_lock(binding->netdev);
break;
}
}
@@ -49,7 +49,7 @@ static void net_shaper_unlock(struct net_shaper_binding *binding)
{
switch (binding->type) {
case NET_SHAPER_BINDING_TYPE_NETDEV:
mutex_unlock(&binding->netdev->lock);
netdev_unlock(binding->netdev);
break;
}
}
@@ -1398,7 +1398,7 @@ void net_shaper_set_real_num_tx_queues(struct net_device *dev,
/* Only drivers implementing shapers support ensure
* the lock is acquired in advance.
*/
lockdep_assert_held(&dev->lock);
netdev_assert_locked(dev);
/* Take action only when decreasing the tx queue number. */
for (i = txq; i < dev->real_num_tx_queues; ++i) {