Merge branch 'Devlink-health-updates'

Eran Ben Elisha says:

====================
Devlink health updates

This patchset includes a fix [patch 01] to the devlink health state update, in
case recover was aborted.

In addition, it includes a small enhancement to the infrastructure in order to
allow direct state update in run-time, and use it from mlx5e tx reporter.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-03-04 11:00:43 -08:00
commit d6089c7418
4 changed files with 70 additions and 6 deletions

View File

@ -2302,6 +2302,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
goto err_close_channels;
}
if (!IS_ERR_OR_NULL(priv->tx_reporter))
devlink_health_reporter_state_update(priv->tx_reporter,
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
kvfree(cparam);
return 0;

View File

@ -447,6 +447,11 @@ typedef void devlink_snapshot_data_dest_t(const void *data);
struct devlink_fmsg;
struct devlink_health_reporter;
enum devlink_health_reporter_state {
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
DEVLINK_HEALTH_REPORTER_STATE_ERROR,
};
/**
* struct devlink_health_reporter_ops - Reporter operations
* @name: reporter name
@ -715,6 +720,9 @@ void *
devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
int devlink_health_report(struct devlink_health_reporter *reporter,
const char *msg, void *priv_ctx);
void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state);
void devlink_compat_running_version(struct net_device *dev,
char *buf, size_t len);
@ -1204,6 +1212,12 @@ devlink_health_report(struct devlink_health_reporter *reporter,
return 0;
}
static inline void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state)
{
}
static inline void
devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
{

View File

@ -140,6 +140,37 @@ TRACE_EVENT(devlink_health_recover_aborted,
__entry->time_since_last_recover)
);
/*
* Tracepoint for devlink health reporter state update:
*/
TRACE_EVENT(devlink_health_reporter_state_update,
TP_PROTO(const struct devlink *devlink, const char *reporter_name,
bool new_state),
TP_ARGS(devlink, reporter_name, new_state),
TP_STRUCT__entry(
__string(bus_name, devlink->dev->bus->name)
__string(dev_name, dev_name(devlink->dev))
__string(driver_name, devlink->dev->driver->name)
__string(reporter_name, reporter_name)
__field(u8, new_state)
),
TP_fast_assign(
__assign_str(bus_name, devlink->dev->bus->name);
__assign_str(dev_name, dev_name(devlink->dev));
__assign_str(driver_name, devlink->dev->driver->name);
__assign_str(reporter_name, reporter_name);
__entry->new_state = new_state;
),
TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: new_state=%d",
__get_str(bus_name), __get_str(dev_name),
__get_str(driver_name), __get_str(reporter_name),
__entry->new_state)
);
#endif /* _TRACE_DEVLINK_H */
/* This part must be outside protection */

View File

@ -4409,11 +4409,6 @@ struct devlink_health_reporter {
u64 last_recovery_ts;
};
enum devlink_health_reporter_state {
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
DEVLINK_HEALTH_REPORTER_STATE_ERROR,
};
void *
devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
{
@ -4498,6 +4493,23 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state)
{
if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
return;
if (reporter->health_state == state)
return;
reporter->health_state = state;
trace_devlink_health_reporter_state_update(reporter->devlink,
reporter->ops->name, state);
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
static int
devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
void *priv_ctx)
@ -4569,16 +4581,19 @@ dump_err:
int devlink_health_report(struct devlink_health_reporter *reporter,
const char *msg, void *priv_ctx)
{
enum devlink_health_reporter_state prev_health_state;
struct devlink *devlink = reporter->devlink;
/* write a log message of the current error */
WARN_ON(!msg);
trace_devlink_health_report(devlink, reporter->ops->name, msg);
reporter->error_count++;
prev_health_state = reporter->health_state;
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
/* abort if the previous error wasn't recovered */
if (reporter->auto_recover &&
(reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
(prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
jiffies - reporter->last_recovery_ts <
msecs_to_jiffies(reporter->graceful_period))) {
trace_devlink_health_recover_aborted(devlink,