Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
ice: add support for devlink health events

Przemek Kitszel says:

Reports for two kinds of events are implemented, Malicious Driver
Detection (MDD) and Tx hang.

Patches 1, 2, 3: core improvements (checkpatch.pl, devlink extension)
Patch 4: rename current ice devlink/ files
Patches 5, 6, 7: ice devlink health infra + reporters

Mateusz did good job caring for this series, and hardening the code.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  ice: Add MDD logging via devlink health
  ice: add Tx hang devlink health reporter
  ice: rename devlink_port.[ch] to port.[ch]
  devlink: add devlink_fmsg_dump_skb() function
  devlink: add devlink_fmsg_put() macro
  checkpatch: don't complain on _Generic() use
====================

Link: https://patch.msgid.link/20241217210835.3702003-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-12-18 19:17:08 -08:00
commit 44d49629bf
14 changed files with 438 additions and 12 deletions

@ -32,7 +32,8 @@ ice-y := ice_main.o \
ice_parser_rt.o \
ice_idc.o \
devlink/devlink.o \
devlink/devlink_port.o \
devlink/health.o \
devlink/port.o \
ice_sf_eth.o \
ice_sf_vsi_vlan_ops.o \
ice_ddp.o \

@ -6,7 +6,7 @@
#include "ice.h"
#include "ice_lib.h"
#include "devlink.h"
#include "devlink_port.h"
#include "port.h"
#include "ice_eswitch.h"
#include "ice_fw_update.h"
#include "ice_dcb_lib.h"

@ -0,0 +1,269 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024, Intel Corporation. */
#include "health.h"
#include "ice.h"
#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \
devlink_fmsg_put(fmsg, #name, (obj)->name)
/**
* ice_devlink_health_report - boilerplate to call given @reporter
*
* @reporter: devlink health reporter to call, do nothing on NULL
* @msg: message to pass up, "event name" is fine
* @priv_ctx: typically some event struct
*/
static void ice_devlink_health_report(struct devlink_health_reporter *reporter,
const char *msg, void *priv_ctx)
{
if (!reporter)
return;
/* We do not do auto recovering, so return value of the below function
* will always be 0, thus we do ignore it.
*/
devlink_health_report(reporter, msg, priv_ctx);
}
struct ice_mdd_event {
enum ice_mdd_src src;
u16 vf_num;
u16 queue;
u8 pf_num;
u8 event;
};
static const char *ice_mdd_src_to_str(enum ice_mdd_src src)
{
switch (src) {
case ICE_MDD_SRC_TX_PQM:
return "tx_pqm";
case ICE_MDD_SRC_TX_TCLAN:
return "tx_tclan";
case ICE_MDD_SRC_TX_TDPU:
return "tx_tdpu";
case ICE_MDD_SRC_RX:
return "rx";
default:
return "invalid";
}
}
static int
ice_mdd_reporter_dump(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg, void *priv_ctx,
struct netlink_ext_ack *extack)
{
struct ice_mdd_event *mdd_event = priv_ctx;
const char *src;
if (!mdd_event)
return 0;
src = ice_mdd_src_to_str(mdd_event->src);
devlink_fmsg_obj_nest_start(fmsg);
devlink_fmsg_put(fmsg, "src", src);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, pf_num);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, vf_num);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, event);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, queue);
devlink_fmsg_obj_nest_end(fmsg);
return 0;
}
/**
* ice_report_mdd_event - Report an MDD event through devlink health
* @pf: the PF device structure
* @src: the HW block that was the source of this MDD event
* @pf_num: the pf_num on which the MDD event occurred
* @vf_num: the vf_num on which the MDD event occurred
* @event: the event type of the MDD event
* @queue: the queue on which the MDD event occurred
*
* Report an MDD event that has occurred on this PF.
*/
void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num,
u16 vf_num, u8 event, u16 queue)
{
struct ice_mdd_event ev = {
.src = src,
.pf_num = pf_num,
.vf_num = vf_num,
.event = event,
.queue = queue,
};
ice_devlink_health_report(pf->health_reporters.mdd, "MDD event", &ev);
}
/**
* ice_fmsg_put_ptr - put hex value of pointer into fmsg
*
* @fmsg: devlink fmsg under construction
* @name: name to pass
* @ptr: 64 bit value to print as hex and put into fmsg
*/
static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name,
void *ptr)
{
char buf[sizeof(ptr) * 3];
sprintf(buf, "%p", ptr);
devlink_fmsg_put(fmsg, name, buf);
}
struct ice_tx_hang_event {
u32 head;
u32 intr;
u16 vsi_num;
u16 queue;
u16 next_to_clean;
u16 next_to_use;
struct ice_tx_ring *tx_ring;
};
static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg, void *priv_ctx,
struct netlink_ext_ack *extack)
{
struct ice_tx_hang_event *event = priv_ctx;
struct sk_buff *skb;
if (!event)
return 0;
skb = event->tx_ring->tx_buf->skb;
devlink_fmsg_obj_nest_start(fmsg);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean);
ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use);
devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name);
ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc);
ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma);
ice_fmsg_put_ptr(fmsg, "skb-ptr", skb);
devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc,
event->tx_ring->count * sizeof(struct ice_tx_desc));
devlink_fmsg_dump_skb(fmsg, skb);
devlink_fmsg_obj_nest_end(fmsg);
return 0;
}
void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring,
u16 vsi_num, u32 head, u32 intr)
{
struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
buf->tx_ring = tx_ring;
buf->vsi_num = vsi_num;
buf->head = head;
buf->intr = intr;
}
void ice_report_tx_hang(struct ice_pf *pf)
{
struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
struct ice_tx_ring *tx_ring = buf->tx_ring;
struct ice_tx_hang_event ev = {
.head = buf->head,
.intr = buf->intr,
.vsi_num = buf->vsi_num,
.queue = tx_ring->q_index,
.next_to_clean = tx_ring->next_to_clean,
.next_to_use = tx_ring->next_to_use,
.tx_ring = tx_ring,
};
ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev);
}
static struct devlink_health_reporter *
ice_init_devlink_rep(struct ice_pf *pf,
const struct devlink_health_reporter_ops *ops)
{
struct devlink *devlink = priv_to_devlink(pf);
struct devlink_health_reporter *rep;
const u64 graceful_period = 0;
rep = devl_health_reporter_create(devlink, ops, graceful_period, pf);
if (IS_ERR(rep)) {
struct device *dev = ice_pf_to_dev(pf);
dev_err(dev, "failed to create devlink %s health report er",
ops->name);
return NULL;
}
return rep;
}
#define ICE_DEFINE_HEALTH_REPORTER_OPS(_name) \
static const struct devlink_health_reporter_ops ice_ ## _name ## _reporter_ops = { \
.name = #_name, \
.dump = ice_ ## _name ## _reporter_dump, \
}
ICE_DEFINE_HEALTH_REPORTER_OPS(mdd);
ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang);
/**
* ice_health_init - allocate and init all ice devlink health reporters and
* accompanied data
*
* @pf: PF struct
*/
void ice_health_init(struct ice_pf *pf)
{
struct ice_health *reps = &pf->health_reporters;
reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops);
reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops);
}
/**
* ice_deinit_devl_reporter - destroy given devlink health reporter
* @reporter: reporter to destroy
*/
static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter)
{
if (reporter)
devl_health_reporter_destroy(reporter);
}
/**
* ice_health_deinit - deallocate all ice devlink health reporters and
* accompanied data
*
* @pf: PF struct
*/
void ice_health_deinit(struct ice_pf *pf)
{
ice_deinit_devl_reporter(pf->health_reporters.mdd);
ice_deinit_devl_reporter(pf->health_reporters.tx_hang);
}
static
void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter)
{
if (reporter)
devlink_health_reporter_state_update(reporter,
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
}
/**
* ice_health_clear - clear devlink health issues after a reset
* @pf: the PF device structure
*
* Mark the PF in healthy state again after a reset has completed.
*/
void ice_health_clear(struct ice_pf *pf)
{
ice_health_assign_healthy_state(pf->health_reporters.mdd);
ice_health_assign_healthy_state(pf->health_reporters.tx_hang);
}

@ -0,0 +1,58 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2024, Intel Corporation. */
#ifndef _HEALTH_H_
#define _HEALTH_H_
#include <linux/types.h>
/**
* DOC: health.h
*
* This header file stores everything that is needed for broadly understood
* devlink health mechanism for ice driver.
*/
struct ice_pf;
struct ice_tx_ring;
enum ice_mdd_src {
ICE_MDD_SRC_TX_PQM,
ICE_MDD_SRC_TX_TCLAN,
ICE_MDD_SRC_TX_TDPU,
ICE_MDD_SRC_RX,
};
/**
* struct ice_health - stores ice devlink health reporters and accompanied data
* @tx_hang: devlink health reporter for tx_hang event
* @mdd: devlink health reporter for MDD detection event
* @tx_hang_buf: pre-allocated place to put info for Tx hang reporter from
* non-sleeping context
* @tx_ring: ring that the hang occurred on
* @head: descriptor head
* @intr: interrupt register value
* @vsi_num: VSI owning the queue that the hang occurred on
*/
struct ice_health {
struct devlink_health_reporter *mdd;
struct devlink_health_reporter *tx_hang;
struct_group_tagged(ice_health_tx_hang_buf, tx_hang_buf,
struct ice_tx_ring *tx_ring;
u32 head;
u32 intr;
u16 vsi_num;
);
};
void ice_health_init(struct ice_pf *pf);
void ice_health_deinit(struct ice_pf *pf);
void ice_health_clear(struct ice_pf *pf);
void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring,
u16 vsi_num, u32 head, u32 intr);
void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num,
u16 vf_num, u8 event, u16 queue);
void ice_report_tx_hang(struct ice_pf *pf);
#endif /* _HEALTH_H_ */

@ -5,7 +5,7 @@
#include "ice.h"
#include "devlink.h"
#include "devlink_port.h"
#include "port.h"
#include "ice_lib.h"
#include "ice_fltr.h"

@ -78,6 +78,7 @@
#include "ice_irq.h"
#include "ice_dpll.h"
#include "ice_adapter.h"
#include "devlink/health.h"
#define ICE_BAR0 0
#define ICE_REQ_DESC_MULTIPLE 32
@ -665,6 +666,7 @@ struct ice_pf {
struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
struct ice_dplls dplls;
struct device *hwmon_dev;
struct ice_health health_reporters;
u8 num_quanta_prof_used;
};

@ -5,7 +5,7 @@
#define _ICE_ESWITCH_H_
#include <net/devlink.h>
#include "devlink/devlink_port.h"
#include "devlink/port.h"
#ifdef CONFIG_ICE_SWITCHDEV
void ice_eswitch_detach_vf(struct ice_pf *pf, struct ice_vf *vf);

@ -14,7 +14,7 @@
#include "ice_dcb_lib.h"
#include "ice_dcb_nl.h"
#include "devlink/devlink.h"
#include "devlink/devlink_port.h"
#include "devlink/port.h"
#include "ice_sf_eth.h"
#include "ice_hwmon.h"
/* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
@ -1816,6 +1816,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (netif_msg_tx_err(pf))
dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num,
event, queue);
wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
}
@ -1829,6 +1831,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (netif_msg_tx_err(pf))
dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num,
event, queue);
wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX);
}
@ -1842,6 +1846,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (netif_msg_rx_err(pf))
dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event,
queue);
wr32(hw, GL_MDET_RX, 0xffffffff);
}
@ -2364,9 +2370,11 @@ static void ice_service_task(struct work_struct *work)
struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
unsigned long start_time = jiffies;
/* subtasks */
if (pf->health_reporters.tx_hang_buf.tx_ring) {
ice_report_tx_hang(pf);
pf->health_reporters.tx_hang_buf.tx_ring = NULL;
}
/* process reset requests first */
ice_reset_subtask(pf);
/* bail if a reset/recovery cycle is pending or rebuild failed */
@ -5087,6 +5095,7 @@ static int ice_init_devlink(struct ice_pf *pf)
return err;
ice_devlink_init_regions(pf);
ice_health_init(pf);
ice_devlink_register(pf);
return 0;
@ -5095,6 +5104,7 @@ static int ice_init_devlink(struct ice_pf *pf)
static void ice_deinit_devlink(struct ice_pf *pf)
{
ice_devlink_unregister(pf);
ice_health_deinit(pf);
ice_devlink_destroy_regions(pf);
ice_devlink_unregister_params(pf);
}
@ -7793,6 +7803,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
/* if we get here, reset flow is successful */
clear_bit(ICE_RESET_FAILED, pf->state);
ice_health_clear(pf);
ice_plug_aux_dev(pf);
if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
ice_lag_rebuild(pf);
@ -8283,16 +8295,18 @@ void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
if (tx_ring) {
struct ice_hw *hw = &pf->hw;
u32 head, val = 0;
u32 head, intr = 0;
head = FIELD_GET(QTX_COMM_HEAD_HEAD_M,
rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])));
/* Read interrupt register */
val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
intr = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
vsi->vsi_num, txqueue, tx_ring->next_to_clean,
head, tx_ring->next_to_use, val);
head, tx_ring->next_to_use, intr);
ice_prep_tx_hang_report(pf, tx_ring, vsi->vsi_num, head, intr);
}
pf->tx_timeout_last_recovery = jiffies;

@ -4,7 +4,7 @@
#include "ice.h"
#include "ice_eswitch.h"
#include "devlink/devlink.h"
#include "devlink/devlink_port.h"
#include "devlink/port.h"
#include "ice_sriov.h"
#include "ice_tc_lib.h"
#include "ice_dcb_lib.h"

@ -5,8 +5,8 @@
#include "ice_txrx.h"
#include "ice_fltr.h"
#include "ice_sf_eth.h"
#include "devlink/devlink_port.h"
#include "devlink/devlink.h"
#include "devlink/port.h"
static const struct net_device_ops ice_sf_netdev_ops = {
.ndo_open = ice_open,

@ -1261,6 +1261,18 @@ enum devlink_trap_group_generic_id {
.min_burst = _min_burst, \
}
#define devlink_fmsg_put(fmsg, name, value) ( \
_Generic((value), \
bool : devlink_fmsg_bool_pair_put, \
u8 : devlink_fmsg_u8_pair_put, \
u16 : devlink_fmsg_u32_pair_put, \
u32 : devlink_fmsg_u32_pair_put, \
u64 : devlink_fmsg_u64_pair_put, \
int : devlink_fmsg_u32_pair_put, \
char * : devlink_fmsg_string_pair_put, \
const char * : devlink_fmsg_string_pair_put) \
(fmsg, name, (value)))
enum {
/* device supports reload operations */
DEVLINK_F_RELOAD = 1UL << 0,
@ -1994,6 +2006,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port);
size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port);
void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb);
#else

@ -1238,3 +1238,70 @@ int devlink_nl_health_reporter_test_doit(struct sk_buff *skb,
return reporter->ops->test(reporter, info->extack);
}
/**
* devlink_fmsg_dump_skb - Dump sk_buffer structure
* @fmsg: devlink formatted message pointer
* @skb: pointer to skb
*
* Dump diagnostic information about sk_buff structure, like headroom, length,
* tailroom, MAC, etc.
*/
void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb)
{
struct skb_shared_info *sh = skb_shinfo(skb);
struct sock *sk = skb->sk;
bool has_mac, has_trans;
has_mac = skb_mac_header_was_set(skb);
has_trans = skb_transport_header_was_set(skb);
devlink_fmsg_pair_nest_start(fmsg, "skb");
devlink_fmsg_obj_nest_start(fmsg);
devlink_fmsg_put(fmsg, "actual len", skb->len);
devlink_fmsg_put(fmsg, "head len", skb_headlen(skb));
devlink_fmsg_put(fmsg, "data len", skb->data_len);
devlink_fmsg_put(fmsg, "tail len", skb_tailroom(skb));
devlink_fmsg_put(fmsg, "MAC", has_mac ? skb->mac_header : -1);
devlink_fmsg_put(fmsg, "MAC len",
has_mac ? skb_mac_header_len(skb) : -1);
devlink_fmsg_put(fmsg, "network hdr", skb->network_header);
devlink_fmsg_put(fmsg, "network hdr len",
has_trans ? skb_network_header_len(skb) : -1);
devlink_fmsg_put(fmsg, "transport hdr",
has_trans ? skb->transport_header : -1);
devlink_fmsg_put(fmsg, "csum", (__force u32)skb->csum);
devlink_fmsg_put(fmsg, "csum_ip_summed", (u8)skb->ip_summed);
devlink_fmsg_put(fmsg, "csum_complete_sw", !!skb->csum_complete_sw);
devlink_fmsg_put(fmsg, "csum_valid", !!skb->csum_valid);
devlink_fmsg_put(fmsg, "csum_level", (u8)skb->csum_level);
devlink_fmsg_put(fmsg, "sw_hash", !!skb->sw_hash);
devlink_fmsg_put(fmsg, "l4_hash", !!skb->l4_hash);
devlink_fmsg_put(fmsg, "proto", ntohs(skb->protocol));
devlink_fmsg_put(fmsg, "pkt_type", (u8)skb->pkt_type);
devlink_fmsg_put(fmsg, "iif", skb->skb_iif);
if (sk) {
devlink_fmsg_pair_nest_start(fmsg, "sk");
devlink_fmsg_obj_nest_start(fmsg);
devlink_fmsg_put(fmsg, "family", sk->sk_type);
devlink_fmsg_put(fmsg, "type", sk->sk_type);
devlink_fmsg_put(fmsg, "proto", sk->sk_protocol);
devlink_fmsg_obj_nest_end(fmsg);
devlink_fmsg_pair_nest_end(fmsg);
}
devlink_fmsg_obj_nest_end(fmsg);
devlink_fmsg_pair_nest_end(fmsg);
devlink_fmsg_pair_nest_start(fmsg, "shinfo");
devlink_fmsg_obj_nest_start(fmsg);
devlink_fmsg_put(fmsg, "tx_flags", sh->tx_flags);
devlink_fmsg_put(fmsg, "nr_frags", sh->nr_frags);
devlink_fmsg_put(fmsg, "gso_size", sh->gso_size);
devlink_fmsg_put(fmsg, "gso_type", sh->gso_type);
devlink_fmsg_put(fmsg, "gso_segs", sh->gso_segs);
devlink_fmsg_obj_nest_end(fmsg);
devlink_fmsg_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_dump_skb);

@ -5843,6 +5843,8 @@ sub process {
#CamelCase
if ($var !~ /^$Constant$/ &&
$var =~ /[A-Z][a-z]|[a-z][A-Z]/ &&
#Ignore C keywords
$var !~ /^_Generic$/ &&
#Ignore some autogenerated defines and enum values
$var !~ /^(?:[A-Z]+_){1,5}[A-Z]{1,3}[a-z]/ &&
#Ignore Page<foo> variants