Skip to content

Commit

Permalink
Merge branch 'mlxsw-Expose-transceiver-overheat-counter'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
mlxsw: Expose transceiver overheat counter

Amit says:

An overheated transceiver can be the root cause of various network
problems such as link flapping. Counting the number of times a
transceiver's temperature was higher than its configured threshold can
therefore help in debugging such issues.

This patch set exposes a transceiver overheat counter via ethtool. This
is achieved by configuring the Spectrum ASIC to generate events whenever
a transceiver is overheated. The temperature thresholds are queried from
the transceiver (if available) and set to the default otherwise.

Example:

...
transceiver_overheat: 2

Patch set overview:

Patches #1-#3 add required device registers
Patches #4-#5 add required infrastructure in mlxsw to configure and
count overheat events
Patches #6-#9 gradually add support for the transceiver overheat counter
Patch #10 exposes the transceiver overheat counter via ethtool
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Sep 27, 2020
2 parents 603d11c + 69f6d4e commit 7cd427e
Show file tree
Hide file tree
Showing 10 changed files with 660 additions and 5 deletions.
27 changes: 27 additions & 0 deletions drivers/net/ethernet/mellanox/mlxsw/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <trace/events/devlink.h>

#include "core.h"
#include "core_env.h"
#include "item.h"
#include "cmd.h"
#include "port.h"
Expand Down Expand Up @@ -87,6 +88,8 @@ struct mlxsw_core {
struct {
struct devlink_health_reporter *fw_fatal;
} health;
struct mlxsw_env *env;
bool is_initialized; /* Denotes if core was already initialized. */
unsigned long driver_priv[];
/* driver_priv has to be always the last item */
};
Expand Down Expand Up @@ -133,6 +136,11 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
}
EXPORT_SYMBOL(mlxsw_core_res_query_enabled);

bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core)
{
return mlxsw_core->driver->temp_warn_enabled;
}

bool
mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
const struct mlxsw_fw_rev *req_rev)
Expand Down Expand Up @@ -1943,13 +1951,20 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_thermal_init;

err = mlxsw_env_init(mlxsw_core, &mlxsw_core->env);
if (err)
goto err_env_init;

mlxsw_core->is_initialized = true;
devlink_params_publish(devlink);

if (!reload)
devlink_reload_enable(devlink);

return 0;

err_env_init:
mlxsw_thermal_fini(mlxsw_core->thermal);
err_thermal_init:
mlxsw_hwmon_fini(mlxsw_core->hwmon);
err_hwmon_init:
Expand Down Expand Up @@ -2026,6 +2041,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
}

devlink_params_unpublish(devlink);
mlxsw_core->is_initialized = false;
mlxsw_env_fini(mlxsw_core->env);
mlxsw_thermal_fini(mlxsw_core->thermal);
mlxsw_hwmon_fini(mlxsw_core->hwmon);
if (mlxsw_core->driver->fini)
Expand Down Expand Up @@ -2829,6 +2846,16 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
}
EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);

struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core)
{
return mlxsw_core->env;
}

bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core)
{
return mlxsw_core->is_initialized;
}

int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module)
{
enum mlxsw_reg_pmtm_module_type module_type;
Expand Down
5 changes: 5 additions & 0 deletions drivers/net/ethernet/mellanox/mlxsw/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);

bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);

bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core);

bool
mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
const struct mlxsw_fw_rev *req_rev);
Expand Down Expand Up @@ -221,6 +223,8 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
struct devlink_port *
mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
u8 local_port);
struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core);
bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core);
int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module);

int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
Expand Down Expand Up @@ -371,6 +375,7 @@ struct mlxsw_driver {
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
bool fw_fatal_enabled;
bool temp_warn_enabled;
};

int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
Expand Down
Loading

0 comments on commit 7cd427e

Please sign in to comment.