From 9b258baec1e2f8f196d4b42857044887fba3a86b Mon Sep 17 00:00:00 2001
From: Stepan Blyschak <stepanb@nvidia.com>
Date: Mon, 20 Jun 2022 15:02:27 +0300
Subject: [PATCH] [patch] mlxsw: i2c: Prevent transaction execution for special
 chip states

Do not run transaction in cases chip is in reset or in-filed update states.

This patch fixes an issue encountered during SONiC warm-reboot: during
ISSU start the kernel reports that CPU stall is detected:

```
INFO: rcu_sched detected stalls on CPUs/tasks
```

This patch won't be upstreamed due to the upstream driver not supporting
ISSU.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
---
 ...2c-Prevent-transaction-execution-for.patch | 126 ++++++++++++++++++
 patch/series                                  |   1 +
 2 files changed, 127 insertions(+)
 create mode 100644 patch/0065-TMP-mlxsw-i2c-Prevent-transaction-execution-for.patch

diff --git a/patch/0065-TMP-mlxsw-i2c-Prevent-transaction-execution-for.patch b/patch/0065-TMP-mlxsw-i2c-Prevent-transaction-execution-for.patch
new file mode 100644
index 000000000..16b1eab20
--- /dev/null
+++ b/patch/0065-TMP-mlxsw-i2c-Prevent-transaction-execution-for.patch
@@ -0,0 +1,126 @@
+From d3e0bf403d527f717a62ea328781256f999d4f95 Mon Sep 17 00:00:00 2001
+From: Stepan Blyschak <stepanb@nvidia.com>
+Date: Mon, 20 Jun 2022 19:28:04 +0300
+Subject: [PATCH] TMP: mlxsw: i2c: Prevent transaction execution for special
+ chip states
+
+Do not run transaction in cases chip is in reset or in-service update
+states. In such case firmware is not accessible and will reject transaction
+with the relevant status "RUNNING_RESET" or "FW_ISSU_ONGOING".
+In case transaction is failed do to one of these reasons, stop sending
+transactions. In such case driver is about to be removed since it
+cannot continue running after reset or in-service update. And
+re-probed again after reset or in-service update is completed.
+
+Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
+Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
+---
+ drivers/net/ethernet/mellanox/mlxsw/cmd.h |  4 ++++
+ drivers/net/ethernet/mellanox/mlxsw/i2c.c | 29 ++++++++++++++++++++---
+ 2 files changed, 30 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+index 91f68fb0b..d8ecb8c8a 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+@@ -149,6 +149,8 @@ enum mlxsw_cmd_status {
+ 	MLXSW_CMD_STATUS_BAD_NVMEM	= 0x0B,
+ 	/* Device is currently running reset */
+ 	MLXSW_CMD_STATUS_RUNNING_RESET	= 0x26,
++	/* FW ISSU ongoing. */
++	MLXSW_CMD_STATUS_FW_ISSU	= 0x27,
+ 	/* Bad management packet (silently discarded). */
+ 	MLXSW_CMD_STATUS_BAD_PKT	= 0x30,
+ };
+@@ -180,6 +182,8 @@ static inline const char *mlxsw_cmd_status_str(u8 status)
+ 		return "BAD_NVMEM";
+ 	case MLXSW_CMD_STATUS_RUNNING_RESET:
+ 		return "RUNNING_RESET";
++	case MLXSW_CMD_STATUS_FW_ISSU:
++		return "FW_ISSU_ONGOING";
+ 	case MLXSW_CMD_STATUS_BAD_PKT:
+ 		return "BAD_PKT";
+ 	default:
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+index 939b692ff..3c9d2c7a0 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+@@ -63,6 +63,7 @@
+  * @core: switch core pointer;
+  * @bus_info: bus info block;
+  * @block_size: maximum block size allowed to pass to under layer;
++ * @status: status to indicate chip reset or in-service update;
+  */
+ struct mlxsw_i2c {
+ 	struct {
+@@ -76,6 +77,7 @@ struct mlxsw_i2c {
+ 	struct mlxsw_core *core;
+ 	struct mlxsw_bus_info bus_info;
+ 	u16 block_size;
++	u8 status;
+ };
+ 
+ #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {	\
+@@ -222,6 +224,19 @@ static int mlxsw_i2c_write_cmd(struct i2c_client *client,
+ 	return 0;
+ }
+ 
++static bool
++mlxsw_i2c_cmd_status_verify(struct device *dev, struct mlxsw_i2c *mlxsw_i2c,
++			    u8 status)
++{
++	if (status == MLXSW_CMD_STATUS_FW_ISSU ||
++	    status == MLXSW_CMD_STATUS_RUNNING_RESET) {
++		mlxsw_i2c->status = status;
++		dev_info(dev, "FW status=%x(%s)): Access to device is not allowed in this state\n", status, mlxsw_cmd_status_str(status));
++		return true;
++	}
++	return false;
++}
++
+ /* Routine posts initialization command to ASIC through mail box. */
+ static int
+ mlxsw_i2c_write_init_cmd(struct i2c_client *client,
+@@ -405,6 +420,10 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
+ 
+ 	WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));
+ 
++	/* Do not run transaction if chip is in reset or in-service update state. */
++	if (mlxsw_i2c->status)
++		return 0;
++
+ 	if (in_mbox) {
+ 		reg_size = mlxsw_i2c_get_reg_size(in_mbox);
+ 		num = reg_size / mlxsw_i2c->block_size;
+@@ -479,6 +498,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
+ 
+ cmd_fail:
+ 	mutex_unlock(&mlxsw_i2c->cmd.lock);
++	if (mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, *status))
++		err = 0;
+ 	return err;
+ }
+ 
+@@ -608,14 +629,16 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
+ 	/* Wait until go bit is cleared. */
+ 	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
+ 	if (err) {
+-		dev_err(&client->dev, "HW semaphore is not released");
++		if (!mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, status))
++			dev_err(&client->dev, "HW semaphore is not released");
+ 		goto errout;
+ 	}
+ 
+ 	/* Validate transaction completion status. */
+ 	if (status) {
+-		dev_err(&client->dev, "Bad transaction completion status %x\n",
+-			status);
++		if (!mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, status))
++			dev_err(&client->dev, "Bad transaction completion status %x\n",
++				status);
+ 		err = -EIO;
+ 		goto errout;
+ 	}
+-- 
+2.17.1
+
diff --git a/patch/series b/patch/series
index a7b25f41e..a3b2c7c34 100755
--- a/patch/series
+++ b/patch/series
@@ -84,6 +84,7 @@ net-sch_generic-fix-the-missing-new-qdisc-assignment.patch
 0031-backport-nvme-Add-hardware-monitoring-support.patch
 0032-platform-mellanox-mlxreg-hotplug-Use-capability-regi.patch
 0064-platform-mellanox-mlxreg-io-Fix-read-access-of-n-byt.patch
+0065-TMP-mlxsw-i2c-Prevent-transaction-execution-for.patch
 0144-platform-x86-mlx-platform-Fix-cpld-_pn-register-seco.patch
 
 # Cisco patches for 4.19 kernel