From aff5d9d378e77a45f33cd933b8b8ddac8fe43528 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 28 Dec 2021 13:47:18 +0200
Subject: [PATCH 01/50] habanalabs: check the return value of
 hl_cs_poll_fences()

As part of handling of the multi-CS wait ioctl, hl_cs_poll_fences() is
called in a "while (true)" loop. This function can fail, but the
checking of its return value was missed.
Add this check and exit the loop in case of a failure.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 0a4ef13d9ac4..2f40b937c59f 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2739,7 +2739,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		mcs_data.update_ts = false;
 		rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
 
-		if (mcs_data.completion_bitmap)
+		if (rc || mcs_data.completion_bitmap)
 			break;
 
 		/*

From eb85eec858c1a5c11d3a0bff403f6440b05b40dc Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Mon, 3 Jan 2022 09:48:27 +0200
Subject: [PATCH 02/50] habanalabs: fix possible memory leak in MMU DR fini

This patch fixes what seems to be copy paste error.

We will have a memory leak if the host-resident shadow is NULL (which
will likely happen as the DR and HR are not dependent).

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/mmu/mmu_v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index 6134b6ae7615..3cadef97817d 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -467,7 +467,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
 {
 	/* MMU H/W fini was already done in device hw_fini() */
 
-	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
+	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
 		kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
 		gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
 

From 4edb4ffe39c9bdaec50186d0ca583a7ff01143de Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 5 Jan 2022 23:14:45 +0200
Subject: [PATCH 03/50] habanalabs/gaudi: disable CGM permanently

Due to the need of SynapseAI to configure all TPC engines from a single
QMAN, the driver must disable CGM and never allow the user to enable
it. Otherwise, the configuration of the TPC engines will fail.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     |  19 +-
 drivers/misc/habanalabs/common/debugfs.c      |  32 +--
 drivers/misc/habanalabs/common/habanalabs.h   |   4 -
 .../misc/habanalabs/common/habanalabs_drv.c   |   1 -
 drivers/misc/habanalabs/gaudi/gaudi.c         | 230 +++++-------------
 drivers/misc/habanalabs/gaudi/gaudiP.h        |   1 -
 6 files changed, 66 insertions(+), 221 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 2667cbf940f3..783001a574b3 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -12,24 +12,7 @@ What:           /sys/kernel/debug/habanalabs/hl<n>/clk_gate
 Date:           May 2020
 KernelVersion:  5.8
 Contact:        ogabbay@kernel.org
-Description:    Allow the root user to disable/enable in runtime the clock
-                gating mechanism in Gaudi. Due to how Gaudi is built, the
-                clock gating needs to be disabled in order to access the
-                registers of the TPC and MME engines. This is sometimes needed
-                during debug by the user and hence the user needs this option.
-                The user can supply a bitmask value, each bit represents
-                a different engine to disable/enable its clock gating feature.
-                The bitmask is composed of 20 bits:
-
-                =======   ============
-                0  -  7   DMA channels
-                8  - 11   MME engines
-                12 - 19   TPC engines
-                =======   ============
-
-                The bit's location of a specific engine can be determined
-                using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
-                are defined in uapi habanalabs.h file in enum gaudi_engine_id
+Description:    This setting is now deprecated as clock gating is handled solely by the f/w
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
 Date:           Jan 2019
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index fc084ee5106e..e3ee5f45d20c 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1054,42 +1054,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
 static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
 					size_t count, loff_t *ppos)
 {
-	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-	struct hl_device *hdev = entry->hdev;
-	char tmp_buf[200];
-	ssize_t rc;
-
-	if (*ppos)
-		return 0;
-
-	sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
-	rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
-			strlen(tmp_buf) + 1);
-
-	return rc;
+	return 0;
 }
 
 static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
 				     size_t count, loff_t *ppos)
 {
-	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-	struct hl_device *hdev = entry->hdev;
-	u64 value;
-	ssize_t rc;
-
-	if (hdev->reset_info.in_reset) {
-		dev_warn_ratelimited(hdev->dev,
-				"Can't change clock gating during reset\n");
-		return 0;
-	}
-
-	rc = kstrtoull_from_user(buf, count, 16, &value);
-	if (rc)
-		return rc;
-
-	hdev->clock_gating_mask = value;
-	hdev->asic_funcs->set_clock_gating(hdev);
-
 	return count;
 }
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index cb710fd478b6..27b9f24c4f75 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2569,9 +2569,6 @@ struct hl_reset_info {
  * @max_power: the max power of the device, as configured by the sysadmin. This
  *             value is saved so in case of hard-reset, the driver will restore
  *             this value and update the F/W after the re-initialization
- * @clock_gating_mask: is clock gating enabled. bitmask that represents the
- *                     different engines. See debugfs-driver-habanalabs for
- *                     details.
  * @boot_error_status_mask: contains a mask of the device boot error status.
  *                          Each bit represents a different error, according to
  *                          the defines in hl_boot_if.h. If the bit is cleared,
@@ -2710,7 +2707,6 @@ struct hl_device {
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
-	u64				clock_gating_mask;
 	u64				boot_error_status_mask;
 	u64				dram_pci_bar_start;
 	u64				last_successful_open_jif;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 690b763c7a95..017bfc4551da 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -256,7 +256,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
 	hdev->cpu_queues_enable = 1;
 	hdev->heartbeat = 1;
 	hdev->mmu_enable = 1;
-	hdev->clock_gating_mask = ULONG_MAX;
 	hdev->sram_scrambler_enable = 1;
 	hdev->dram_scrambler_enable = 1;
 	hdev->bmc_enable = 1;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 013c6da2e3ca..c20b781d4ada 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3740,74 +3740,10 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
 
 static void gaudi_set_clock_gating(struct hl_device *hdev)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
-	u32 qman_offset;
-	bool enable;
-	int i;
-
-	/* In case we are during debug session, don't enable the clock gate
-	 * as it may interfere
-	 */
-	if (hdev->in_debug)
-		return;
-
-	if (hdev->asic_prop.fw_security_enabled)
-		return;
-
-	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
-		enable = !!(hdev->clock_gating_mask &
-				(BIT_ULL(gaudi_dma_assignment[i])));
-
-		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
-		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
-				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
-		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
-				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
-	}
-
-	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
-		enable = !!(hdev->clock_gating_mask &
-				(BIT_ULL(gaudi_dma_assignment[i])));
-
-		/* GC sends work to DMA engine through Upper CP in DMA5 so
-		 * we need to not enable clock gating in that DMA
-		 */
-		if (i == GAUDI_HBM_DMA_4)
-			enable = 0;
-
-		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
-		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
-				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
-		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
-				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
-	}
-
-	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
-	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
-	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
-
-	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
-	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
-	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
-
-	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
-		enable = !!(hdev->clock_gating_mask &
-				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
-
-		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
-				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
-		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
-				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
-
-		qman_offset += TPC_QMAN_OFFSET;
-	}
-
-	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
 }
 
 static void gaudi_disable_clock_gating(struct hl_device *hdev)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 qman_offset;
 	int i;
 
@@ -3832,8 +3768,6 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
 
 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
 	}
-
-	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
 }
 
 static void gaudi_enable_timestamp(struct hl_device *hdev)
@@ -4400,14 +4334,11 @@ skip_reset:
 			status);
 
 	if (gaudi) {
-		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
-				HW_CAP_HBM | HW_CAP_PCI_DMA |
-				HW_CAP_MME | HW_CAP_TPC_MASK |
-				HW_CAP_HBM_DMA | HW_CAP_PLL |
-				HW_CAP_NIC_MASK | HW_CAP_MMU |
-				HW_CAP_SRAM_SCRAMBLER |
-				HW_CAP_HBM_SCRAMBLER |
-				HW_CAP_CLK_GATE);
+		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
+						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
+						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
+						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
+						HW_CAP_HBM_SCRAMBLER);
 
 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
 
@@ -6188,7 +6119,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
 			bool user_address, u32 *val)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u64 hbm_bar_addr, host_phys_end;
 	int rc = 0;
 
@@ -6196,38 +6126,31 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
 
 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
 
-		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
-				(hdev->clock_gating_mask &
-						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+		*val = RREG32(addr - CFG_BASE);
 
-			dev_err_ratelimited(hdev->dev,
-				"Can't read register - clock gating is enabled!\n");
-			rc = -EFAULT;
-		} else {
-			*val = RREG32(addr - CFG_BASE);
-		}
+	} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
+
+		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
 
-	} else if ((addr >= SRAM_BASE_ADDR) &&
-			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
-		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
-				(addr - SRAM_BASE_ADDR));
 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-		u64 bar_base_addr = DRAM_PHYS_BASE +
-				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
+
+		u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
 
 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-		if (hbm_bar_addr != U64_MAX) {
-			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
-						(addr - bar_base_addr));
 
-			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
-						hbm_bar_addr);
+		if (hbm_bar_addr != U64_MAX) {
+			*val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
+			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
 		}
+
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
+
 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
 			user_address && !iommu_present(&pci_bus_type)) {
+
 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
 	} else {
 		rc = -EFAULT;
 	}
@@ -6239,7 +6162,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
 			bool user_address, u32 val)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u64 hbm_bar_addr, host_phys_end;
 	int rc = 0;
 
@@ -6247,38 +6169,31 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
 
 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
 
-		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
-				(hdev->clock_gating_mask &
-						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+		WREG32(addr - CFG_BASE, val);
 
-			dev_err_ratelimited(hdev->dev,
-				"Can't write register - clock gating is enabled!\n");
-			rc = -EFAULT;
-		} else {
-			WREG32(addr - CFG_BASE, val);
-		}
+	} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
+
+		writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
 
-	} else if ((addr >= SRAM_BASE_ADDR) &&
-			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
-		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
-					(addr - SRAM_BASE_ADDR));
 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-		u64 bar_base_addr = DRAM_PHYS_BASE +
-				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
+
+		u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
 
 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-		if (hbm_bar_addr != U64_MAX) {
-			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
-						(addr - bar_base_addr));
 
-			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
-						hbm_bar_addr);
+		if (hbm_bar_addr != U64_MAX) {
+			writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
+			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
 		}
+
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
+
 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
 			user_address && !iommu_present(&pci_bus_type)) {
+
 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
 	} else {
 		rc = -EFAULT;
 	}
@@ -6290,7 +6205,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
 				bool user_address, u64 *val)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u64 hbm_bar_addr, host_phys_end;
 	int rc = 0;
 
@@ -6298,42 +6212,35 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
 
 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
 
-		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
-				(hdev->clock_gating_mask &
-						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+		u32 val_l = RREG32(addr - CFG_BASE);
+		u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
 
-			dev_err_ratelimited(hdev->dev,
-				"Can't read register - clock gating is enabled!\n");
-			rc = -EFAULT;
-		} else {
-			u32 val_l = RREG32(addr - CFG_BASE);
-			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
-
-			*val = (((u64) val_h) << 32) | val_l;
-		}
+		*val = (((u64) val_h) << 32) | val_l;
 
 	} else if ((addr >= SRAM_BASE_ADDR) &&
-		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
-		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
-				(addr - SRAM_BASE_ADDR));
-	} else if (addr <=
-		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-		u64 bar_base_addr = DRAM_PHYS_BASE +
-				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
+			(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
+
+		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
+
+	} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
+
+		u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
 
 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-		if (hbm_bar_addr != U64_MAX) {
-			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
-						(addr - bar_base_addr));
 
-			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
-						hbm_bar_addr);
+		if (hbm_bar_addr != U64_MAX) {
+			*val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
+			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
 		}
+
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
+
 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
 			user_address && !iommu_present(&pci_bus_type)) {
+
 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
 	} else {
 		rc = -EFAULT;
 	}
@@ -6345,7 +6252,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
 				bool user_address, u64 val)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u64 hbm_bar_addr, host_phys_end;
 	int rc = 0;
 
@@ -6353,41 +6259,33 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
 
 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
 
-		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
-				(hdev->clock_gating_mask &
-						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
-
-			dev_err_ratelimited(hdev->dev,
-				"Can't write register - clock gating is enabled!\n");
-			rc = -EFAULT;
-		} else {
-			WREG32(addr - CFG_BASE, lower_32_bits(val));
-			WREG32(addr + sizeof(u32) - CFG_BASE,
-				upper_32_bits(val));
-		}
+		WREG32(addr - CFG_BASE, lower_32_bits(val));
+		WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
 
 	} else if ((addr >= SRAM_BASE_ADDR) &&
-		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
-		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
-					(addr - SRAM_BASE_ADDR));
-	} else if (addr <=
-		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-		u64 bar_base_addr = DRAM_PHYS_BASE +
-				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
+			(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
+
+		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
+
+	} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
+
+		u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
 
 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-		if (hbm_bar_addr != U64_MAX) {
-			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
-						(addr - bar_base_addr));
 
-			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
-						hbm_bar_addr);
+		if (hbm_bar_addr != U64_MAX) {
+			writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
+			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
 		}
+
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
+
 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
 			user_address && !iommu_present(&pci_bus_type)) {
+
 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
 	} else {
 		rc = -EFAULT;
 	}
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 8ac16a9b7d15..caa858bf6edc 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -177,7 +177,6 @@
 #define HW_CAP_MSI		BIT(6)
 #define HW_CAP_CPU_Q		BIT(7)
 #define HW_CAP_HBM_DMA		BIT(8)
-#define HW_CAP_CLK_GATE		BIT(9)
 #define HW_CAP_SRAM_SCRAMBLER	BIT(10)
 #define HW_CAP_HBM_SCRAMBLER	BIT(11)
 

From d280d5954ec37e036c13fb64cf7ace8ce4c5fe15 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 6 Jan 2022 15:02:44 +0200
Subject: [PATCH 04/50] habanalabs: remove ASIC functions of clock gating

Now that clock gating is permanently disabled in GAUDI, no need for
the ASIC functions of setting and disabling clock gating, as this
was a unique scenario in GAUDI.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     |   4 -
 drivers/misc/habanalabs/common/habanalabs.h |   5 -
 drivers/misc/habanalabs/gaudi/gaudi.c       | 123 +-------------------
 drivers/misc/habanalabs/gaudi/gaudiP.h      |   3 -
 drivers/misc/habanalabs/goya/goya.c         |  12 --
 5 files changed, 5 insertions(+), 142 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 733338ab6f1d..9955e0d3502b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -644,9 +644,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
 
 		hdev->in_debug = 0;
 
-		if (!hdev->reset_info.hard_reset_pending)
-			hdev->asic_funcs->set_clock_gating(hdev);
-
 		goto out;
 	}
 
@@ -657,7 +654,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
 		goto out;
 	}
 
-	hdev->asic_funcs->disable_clock_gating(hdev);
 	hdev->in_debug = 1;
 
 out:
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 27b9f24c4f75..24962807e2cf 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1164,9 +1164,6 @@ struct fw_load_mgr {
  * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
  *                              ASID-VA-size mask.
  * @send_heartbeat: send is-alive packet to CPU-CP and verify response.
- * @set_clock_gating: enable/disable clock gating per engine according to
- *                    clock gating mask in hdev
- * @disable_clock_gating: disable clock gating completely
  * @debug_coresight: perform certain actions on Coresight for debugging.
  * @is_device_idle: return true if device is idle, false otherwise.
  * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
@@ -1300,8 +1297,6 @@ struct hl_asic_funcs {
 	int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
 				u32 flags, u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
-	void (*set_clock_gating)(struct hl_device *hdev);
-	void (*disable_clock_gating)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
 	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
 					u8 mask_len, struct seq_file *s);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index c20b781d4ada..16637d629e67 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -458,7 +458,6 @@ struct ecc_info_extract_params {
 	u64 block_address;
 	u32 num_memories;
 	bool derr;
-	bool disable_clock_gating;
 };
 
 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
@@ -1896,7 +1895,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
 		goto free_cpu_accessible_dma_pool;
 
 	spin_lock_init(&gaudi->hw_queues_lock);
-	mutex_init(&gaudi->clk_gate_mutex);
 
 	hdev->supports_sync_stream = true;
 	hdev->supports_coresight = true;
@@ -1946,8 +1944,6 @@ static int gaudi_sw_fini(struct hl_device *hdev)
 
 	dma_pool_destroy(hdev->dma_pool);
 
-	mutex_destroy(&gaudi->clk_gate_mutex);
-
 	kfree(gaudi);
 
 	return 0;
@@ -3738,10 +3734,6 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
 }
 
-static void gaudi_set_clock_gating(struct hl_device *hdev)
-{
-}
-
 static void gaudi_disable_clock_gating(struct hl_device *hdev)
 {
 	u32 qman_offset;
@@ -3810,8 +3802,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_
 	gaudi_stop_hbm_dma_qmans(hdev);
 	gaudi_stop_pci_dma_qmans(hdev);
 
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	msleep(wait_timeout_ms);
 
 	gaudi_pci_dma_stall(hdev);
@@ -4137,10 +4127,8 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
 	/* In case the clock gating was enabled in preboot we need to disable
 	 * it here before touching the MME/TPC registers.
-	 * There is no need to take clk gating mutex because when this function
-	 * runs, no other relevant code can run
 	 */
-	hdev->asic_funcs->disable_clock_gating(hdev);
+	gaudi_disable_clock_gating(hdev);
 
 	/* SRAM scrambler must be initialized after CPU is running from HBM */
 	gaudi_init_scrambler_sram(hdev);
@@ -4166,8 +4154,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
 	gaudi_init_nic_qmans(hdev);
 
-	hdev->asic_funcs->set_clock_gating(hdev);
-
 	gaudi_enable_timestamp(hdev);
 
 	/* MSI must be enabled before CPU queues and NIC are initialized */
@@ -4815,7 +4801,6 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	int rc = 0;
 	u64 val = 0;
 
@@ -4850,17 +4835,11 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
 			return rc;
 		}
 
-		mutex_lock(&gaudi->clk_gate_mutex);
-		hdev->asic_funcs->disable_clock_gating(hdev);
-
 		/* Scrub HBM using all DMA channels in parallel */
 		rc = gaudi_hbm_scrubbing(hdev);
 		if (rc)
 			dev_err(hdev->dev,
 				"Failed to clear HBM in mem scrub all\n");
-
-		hdev->asic_funcs->set_clock_gating(hdev);
-		mutex_unlock(&gaudi->clk_gate_mutex);
 	}
 
 	return rc;
@@ -6344,7 +6323,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
 				void *blob_addr)
 {
 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 qm_glbl_sts0, qm_cgm_sts;
 	u64 dma_offset, qm_offset;
 	dma_addr_t dma_addr;
@@ -6360,10 +6338,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
 	if (!kernel_addr)
 		return -ENOMEM;
 
-	mutex_lock(&gaudi->clk_gate_mutex);
-
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	hdev->asic_funcs->hw_queues_lock(hdev);
 
 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
@@ -6448,10 +6422,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
 out:
 	hdev->asic_funcs->hw_queues_unlock(hdev);
 
-	hdev->asic_funcs->set_clock_gating(hdev);
-
-	mutex_unlock(&gaudi->clk_gate_mutex);
-
 	hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
 						dma_addr);
 
@@ -6499,10 +6469,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
 		return;
 	}
 
-	mutex_lock(&gaudi->clk_gate_mutex);
-
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
@@ -6780,10 +6746,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
 
 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
-
-	hdev->asic_funcs->set_clock_gating(hdev);
-
-	mutex_unlock(&gaudi->clk_gate_mutex);
 }
 
 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
@@ -7164,10 +7126,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
 		struct ecc_info_extract_params *params, u64 *ecc_address,
 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 i, num_mem_regs, reg, err_bit;
 	u64 err_addr, err_word = 0;
-	int rc = 0;
 
 	num_mem_regs = params->num_memories / 32 +
 			((params->num_memories % 32) ? 1 : 0);
@@ -7180,11 +7140,6 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
 	else
 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
 
-	if (params->disable_clock_gating) {
-		mutex_lock(&gaudi->clk_gate_mutex);
-		hdev->asic_funcs->disable_clock_gating(hdev);
-	}
-
 	/* Set invalid wrapper index */
 	*memory_wrapper_idx = 0xFF;
 
@@ -7201,8 +7156,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
 
 	if (*memory_wrapper_idx == 0xFF) {
 		dev_err(hdev->dev, "ECC error information cannot be found\n");
-		rc = -EINVAL;
-		goto enable_clk_gate;
+		return -EINVAL;
 	}
 
 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
@@ -7222,14 +7176,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
 
 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
 
-enable_clk_gate:
-	if (params->disable_clock_gating) {
-		hdev->asic_funcs->set_clock_gating(hdev);
-
-		mutex_unlock(&gaudi->clk_gate_mutex);
-	}
-
-	return rc;
+	return 0;
 }
 
 /*
@@ -7487,7 +7434,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
 		params.num_memories = 90;
 		params.derr = false;
-		params.disable_clock_gating = true;
 		extract_info_from_fw = false;
 		break;
 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
@@ -7496,7 +7442,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
 		params.num_memories = 90;
 		params.derr = true;
-		params.disable_clock_gating = true;
 		extract_info_from_fw = false;
 		break;
 	case GAUDI_EVENT_MME0_ACC_SERR:
@@ -7507,7 +7452,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
 		params.num_memories = 128;
 		params.derr = false;
-		params.disable_clock_gating = true;
 		extract_info_from_fw = false;
 		break;
 	case GAUDI_EVENT_MME0_ACC_DERR:
@@ -7518,7 +7462,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
 		params.num_memories = 128;
 		params.derr = true;
-		params.disable_clock_gating = true;
 		extract_info_from_fw = false;
 		break;
 	case GAUDI_EVENT_MME0_SBAB_SERR:
@@ -7530,7 +7473,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
 		params.num_memories = 33;
 		params.derr = false;
-		params.disable_clock_gating = true;
 		extract_info_from_fw = false;
 		break;
 	case GAUDI_EVENT_MME0_SBAB_DERR:
@@ -7542,7 +7484,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
 		params.num_memories = 33;
 		params.derr = true;
-		params.disable_clock_gating = true;
 		extract_info_from_fw = false;
 		break;
 	default:
@@ -7864,19 +7805,9 @@ static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
 					char *interrupt_name)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
 	bool soft_reset_required = false;
 
-	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
-	 * gating, and thus cannot be done in CPU-CP and should be done instead
-	 * by the driver.
-	 */
-
-	mutex_lock(&gaudi->clk_gate_mutex);
-
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
 
@@ -7894,10 +7825,6 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
 	/* Clear interrupts */
 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
 
-	hdev->asic_funcs->set_clock_gating(hdev);
-
-	mutex_unlock(&gaudi->clk_gate_mutex);
-
 	return soft_reset_required;
 }
 
@@ -8359,10 +8286,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 	u64 offset;
 	int i, dma_id, port;
 
-	mutex_lock(&gaudi->clk_gate_mutex);
-
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	if (s)
 		seq_puts(s,
 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
@@ -8483,10 +8406,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 	if (s)
 		seq_puts(s, "\n");
 
-	hdev->asic_funcs->set_clock_gating(hdev);
-
-	mutex_unlock(&gaudi->clk_gate_mutex);
-
 	return is_idle;
 }
 
@@ -8526,10 +8445,8 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
  * this function should be used only during initialization and/or after reset,
  * when there are no active users.
  */
-static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
-				u32 tpc_id)
+static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u64 kernel_timeout;
 	u32 status, offset;
 	int rc;
@@ -8541,10 +8458,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 	else
 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
 
-	mutex_lock(&gaudi->clk_gate_mutex);
-
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
 			lower_32_bits(tpc_kernel));
 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
@@ -8584,8 +8497,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 		dev_err(hdev->dev,
 			"Timeout while waiting for TPC%d icache prefetch\n",
 			tpc_id);
-		hdev->asic_funcs->set_clock_gating(hdev);
-		mutex_unlock(&gaudi->clk_gate_mutex);
 		return -EIO;
 	}
 
@@ -8609,8 +8520,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 		dev_err(hdev->dev,
 			"Timeout while waiting for TPC%d vector pipe\n",
 			tpc_id);
-		hdev->asic_funcs->set_clock_gating(hdev);
-		mutex_unlock(&gaudi->clk_gate_mutex);
 		return -EIO;
 	}
 
@@ -8622,9 +8531,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 		1000,
 		kernel_timeout);
 
-	hdev->asic_funcs->set_clock_gating(hdev);
-	mutex_unlock(&gaudi->clk_gate_mutex);
-
 	if (rc) {
 		dev_err(hdev->dev,
 			"Timeout while waiting for TPC%d kernel to execute\n",
@@ -9191,23 +9097,15 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
 				struct hl_sync_to_engine_map *map)
 {
 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	int i, j, rc;
 	u32 reg_value;
 
 	/* Iterate over TPC engines */
 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
-		/* TPC registered must be accessed with clock gating disabled */
-		mutex_lock(&gaudi->clk_gate_mutex);
-		hdev->asic_funcs->disable_clock_gating(hdev);
 
 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
 					sds->props[SP_NEXT_TPC] * i);
 
-		/* We can reenable clock_gating */
-		hdev->asic_funcs->set_clock_gating(hdev);
-		mutex_unlock(&gaudi->clk_gate_mutex);
-
 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
 							ENGINE_TPC, i);
 		if (rc)
@@ -9217,20 +9115,11 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
 	/* Iterate over MME engines */
 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
-			/* MME registered must be accessed with clock gating
-			 * disabled
-			 */
-			mutex_lock(&gaudi->clk_gate_mutex);
-			hdev->asic_funcs->disable_clock_gating(hdev);
 
 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
 						sds->props[SP_NEXT_MME] * i +
 						j * sizeof(u32));
 
-			/* We can reenable clock_gating */
-			hdev->asic_funcs->set_clock_gating(hdev);
-			mutex_unlock(&gaudi->clk_gate_mutex);
-
 			rc = gaudi_add_sync_to_engine_map_entry(
 				map, reg_value, ENGINE_MME,
 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
@@ -9481,8 +9370,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
 	.send_heartbeat = gaudi_send_heartbeat,
-	.set_clock_gating = gaudi_set_clock_gating,
-	.disable_clock_gating = gaudi_disable_clock_gating,
 	.debug_coresight = gaudi_debug_coresight,
 	.is_device_idle = gaudi_is_device_idle,
 	.non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index caa858bf6edc..fdadcbf3ef80 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -312,8 +312,6 @@ struct gaudi_internal_qman_info {
  * struct gaudi_device - ASIC specific manage structure.
  * @cpucp_info_get: get information on device from CPU-CP
  * @hw_queues_lock: protects the H/W queues from concurrent access.
- * @clk_gate_mutex: protects code areas that require clock gating to be disabled
- *                  temporarily
  * @internal_qmans: Internal QMANs information. The array size is larger than
  *                  the actual number of internal queues because they are not in
  *                  consecutive order.
@@ -336,7 +334,6 @@ struct gaudi_device {
 
 	/* TODO: remove hw_queues_lock after moving to scheduler code */
 	spinlock_t			hw_queues_lock;
-	struct mutex			clk_gate_mutex;
 
 	struct gaudi_internal_qman_info	internal_qmans[GAUDI_QUEUE_ID_SIZE];
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index fbcc7bbf44b3..954ef4d7bbf7 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5391,16 +5391,6 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static void goya_set_clock_gating(struct hl_device *hdev)
-{
-	/* clock gating not supported in Goya */
-}
-
-static void goya_disable_clock_gating(struct hl_device *hdev)
-{
-	/* clock gating not supported in Goya */
-}
-
 static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 					u8 mask_len, struct seq_file *s)
 {
@@ -5734,8 +5724,6 @@ static const struct hl_asic_funcs goya_funcs = {
 	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
 	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
 	.send_heartbeat = goya_send_heartbeat,
-	.set_clock_gating = goya_set_clock_gating,
-	.disable_clock_gating = goya_disable_clock_gating,
 	.debug_coresight = goya_debug_coresight,
 	.is_device_idle = goya_is_device_idle,
 	.non_hard_reset_late_init = goya_non_hard_reset_late_init,

From 2bf338f2ac69e3d41a6d45de1d9ac0d3afe109e6 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sat, 22 Jan 2022 20:14:24 +0200
Subject: [PATCH 05/50] habanalabs: make some MMU functions common

Some MMU functions can be used by different versions of our MMUs, so
move them to be common.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  9 ++++
 drivers/misc/habanalabs/common/mmu/mmu.c    | 55 +++++++++++++++++++++
 drivers/misc/habanalabs/common/mmu/mmu_v1.c | 45 +++--------------
 3 files changed, 72 insertions(+), 37 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 24962807e2cf..e96f26e7e804 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -402,8 +402,11 @@ enum hl_device_hw_state {
  * @hop4_mask: mask to get the PTE address in hop 4.
  * @hop5_mask: mask to get the PTE address in hop 5.
  * @last_mask: mask to get the bit indicating this is the last hop.
+ * @pgt_size: size for page tables.
  * @page_size: default page size used to allocate memory.
  * @num_hops: The amount of hops supported by the translation table.
+ * @hop_table_size: HOP table size.
+ * @hop0_tables_total_size: total size for all HOP0 tables.
  * @host_resident: Should the MMU page table reside in host memory or in the
  *                 device DRAM.
  */
@@ -423,8 +426,11 @@ struct hl_mmu_properties {
 	u64	hop4_mask;
 	u64	hop5_mask;
 	u64	last_mask;
+	u64	pgt_size;
 	u32	page_size;
 	u32	num_hops;
+	u32	hop_table_size;
+	u32	hop0_tables_total_size;
 	u8	host_resident;
 };
 
@@ -3015,6 +3021,9 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
 int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
 int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
 					u32 flags, u32 asid, u64 va, u64 size);
+u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
+u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
+					u8 hop_idx, u64 hop_addr, u64 virt_addr);
 void hl_mmu_swap_out(struct hl_ctx *ctx);
 void hl_mmu_swap_in(struct hl_ctx *ctx);
 int hl_mmu_if_set_funcs(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 9153a1f55175..810b73421ce1 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -662,3 +662,58 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
 	return rc;
 }
 
+u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
+{
+	return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
+}
+
+/**
+ * hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP
+ * @ctx: pointer to the context structure to initialize.
+ * @hop_idx: HOP index.
+ * @hop_addr: HOP address.
+ * @virt_addr: virtual address fro the translation.
+ *
+ * @return the matching PTE value on success, otherwise U64_MAX.
+ */
+u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
+					u8 hop_idx, u64 hop_addr, u64 virt_addr)
+{
+	u64 mask, shift;
+
+	if (hop_idx >= mmu_prop->num_hops) {
+		dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx);
+		return U64_MAX;
+	}
+
+	/* currently max number of HOPs is 6 */
+	switch (hop_idx) {
+	case 0:
+		mask = mmu_prop->hop0_mask;
+		shift = mmu_prop->hop0_shift;
+		break;
+	case 1:
+		mask = mmu_prop->hop1_mask;
+		shift = mmu_prop->hop1_shift;
+		break;
+	case 2:
+		mask = mmu_prop->hop2_mask;
+		shift = mmu_prop->hop2_shift;
+		break;
+	case 3:
+		mask = mmu_prop->hop3_mask;
+		shift = mmu_prop->hop3_shift;
+		break;
+	case 4:
+		mask = mmu_prop->hop4_mask;
+		shift = mmu_prop->hop4_shift;
+		break;
+	default:
+		mask = mmu_prop->hop5_mask;
+		shift = mmu_prop->hop5_shift;
+		break;
+	}
+
+	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
+}
+
diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index 3cadef97817d..d03786d0c407 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -217,18 +217,10 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
 					mmu_prop->hop4_shift);
 }
 
-static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
-{
-	if (curr_pte & PAGE_PRESENT_MASK)
-		return curr_pte & HOP_PHYS_ADDR_MASK;
-	else
-		return ULLONG_MAX;
-}
-
 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
 						bool *is_new_hop)
 {
-	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
+	u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
 
 	if (hop_addr == ULLONG_MAX) {
 		hop_addr = alloc_hop(ctx);
@@ -546,7 +538,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 
 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 
-	hop1_addr = get_next_hop_addr(ctx, curr_pte);
+	hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
 
 	if (hop1_addr == ULLONG_MAX)
 		goto not_mapped;
@@ -555,7 +547,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 
 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 
-	hop2_addr = get_next_hop_addr(ctx, curr_pte);
+	hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
 
 	if (hop2_addr == ULLONG_MAX)
 		goto not_mapped;
@@ -564,7 +556,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 
 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 
-	hop3_addr = get_next_hop_addr(ctx, curr_pte);
+	hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
 
 	if (hop3_addr == ULLONG_MAX)
 		goto not_mapped;
@@ -582,7 +574,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 	}
 
 	if (!is_huge) {
-		hop4_addr = get_next_hop_addr(ctx, curr_pte);
+		hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
 
 		if (hop4_addr == ULLONG_MAX)
 			goto not_mapped;
@@ -845,27 +837,6 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
 
 }
 
-static inline u64 get_hop_pte_addr(struct hl_ctx *ctx,
-				struct hl_mmu_properties *mmu_prop,
-				int hop_num, u64 hop_addr, u64 virt_addr)
-{
-	switch (hop_num) {
-	case 0:
-		return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-	case 1:
-		return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-	case 2:
-		return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-	case 3:
-		return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-	case 4:
-		return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-	default:
-		break;
-	}
-	return U64_MAX;
-}
-
 static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 				struct hl_mmu_hop_info *hops)
 {
@@ -906,7 +877,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 
 	hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
 	hops->hop_info[0].hop_pte_addr =
-			get_hop_pte_addr(ctx, mmu_prop, 0,
+			hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
 					hops->hop_info[0].hop_addr, virt_addr);
 	hops->hop_info[0].hop_pte_val =
 			hdev->asic_funcs->read_pte(hdev,
@@ -914,13 +885,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 
 	for (i = 1 ; i < used_hops ; i++) {
 		hops->hop_info[i].hop_addr =
-			get_next_hop_addr(ctx,
+			hl_mmu_get_next_hop_addr(ctx,
 					hops->hop_info[i - 1].hop_pte_val);
 		if (hops->hop_info[i].hop_addr == ULLONG_MAX)
 			return -EFAULT;
 
 		hops->hop_info[i].hop_pte_addr =
-				get_hop_pte_addr(ctx, mmu_prop, i,
+				hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
 						hops->hop_info[i].hop_addr,
 						virt_addr);
 		hops->hop_info[i].hop_pte_val =

From bfbe9cbedd41402e3be3c5189a6b82933a5cd76d Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 13:23:54 +0200
Subject: [PATCH 06/50] habanalabs: sysfs functions should be in sysfs.c

Move common sysfs store/show functions to sysfs.c file for
consistency.

This is part of a patch-set to remove hwmgr.c

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  3 +-
 drivers/misc/habanalabs/common/hwmgr.c      | 72 ---------------------
 drivers/misc/habanalabs/common/sysfs.c      | 70 +++++++++++++++++++-
 drivers/misc/habanalabs/gaudi/gaudi.c       |  2 +-
 4 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index e96f26e7e804..571998899253 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3125,8 +3125,7 @@ int hl_get_power(struct hl_device *hdev,
 int hl_get_clk_rate(struct hl_device *hdev,
 			u32 *cur_clk, u32 *max_clk);
 void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
-void hl_add_device_attr(struct hl_device *hdev,
-			struct attribute_group *dev_attr_grp);
+void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp);
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
 void hl_encaps_handle_do_release(struct kref *ref);
diff --git a/drivers/misc/habanalabs/common/hwmgr.c b/drivers/misc/habanalabs/common/hwmgr.c
index 5451019f143f..f0e5417560c3 100644
--- a/drivers/misc/habanalabs/common/hwmgr.c
+++ b/drivers/misc/habanalabs/common/hwmgr.c
@@ -43,75 +43,3 @@ int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	return 0;
 }
-
-static ssize_t clk_max_freq_mhz_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct hl_device *hdev = dev_get_drvdata(dev);
-	long value;
-
-	if (!hl_device_operational(hdev, NULL))
-		return -ENODEV;
-
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
-
-	hdev->asic_prop.max_freq_value = value;
-
-	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
-}
-
-static ssize_t clk_max_freq_mhz_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct hl_device *hdev = dev_get_drvdata(dev);
-	int rc;
-	u64 value;
-
-	if (!hl_device_operational(hdev, NULL)) {
-		count = -ENODEV;
-		goto fail;
-	}
-
-	rc = kstrtoull(buf, 0, &value);
-	if (rc) {
-		count = -EINVAL;
-		goto fail;
-	}
-
-	hdev->asic_prop.max_freq_value = value * 1000 * 1000;
-
-	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
-			hdev->asic_prop.max_freq_value);
-
-fail:
-	return count;
-}
-
-static ssize_t clk_cur_freq_mhz_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct hl_device *hdev = dev_get_drvdata(dev);
-	long value;
-
-	if (!hl_device_operational(hdev, NULL))
-		return -ENODEV;
-
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
-
-	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
-}
-
-static DEVICE_ATTR_RW(clk_max_freq_mhz);
-static DEVICE_ATTR_RO(clk_cur_freq_mhz);
-
-static struct attribute *hl_dev_attrs[] = {
-	&dev_attr_clk_max_freq_mhz.attr,
-	&dev_attr_clk_cur_freq_mhz.attr,
-	NULL,
-};
-
-void hl_add_device_attr(struct hl_device *hdev,
-			struct attribute_group *dev_attr_grp)
-{
-	dev_attr_grp->attrs = hl_dev_attrs;
-}
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 45c715325e2a..d66cf43788d8 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -109,6 +109,69 @@ void hl_set_max_power(struct hl_device *hdev)
 		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
 }
 
+static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+	long value;
+
+	if (!hl_device_operational(hdev, NULL))
+		return -ENODEV;
+
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
+
+	hdev->asic_prop.max_freq_value = value;
+
+	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
+}
+
+static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+	int rc;
+	u64 value;
+
+	if (!hl_device_operational(hdev, NULL)) {
+		count = -ENODEV;
+		goto fail;
+	}
+
+	rc = kstrtoull(buf, 0, &value);
+	if (rc) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	hdev->asic_prop.max_freq_value = value * 1000 * 1000;
+
+	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value);
+
+fail:
+	return count;
+}
+
+static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+	long value;
+
+	if (!hl_device_operational(hdev, NULL))
+		return -ENODEV;
+
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
+
+	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
+}
+
+static DEVICE_ATTR_RW(clk_max_freq_mhz);
+static DEVICE_ATTR_RO(clk_cur_freq_mhz);
+
+static struct attribute *hl_dev_clk_attrs[] = {
+	&dev_attr_clk_max_freq_mhz.attr,
+	&dev_attr_clk_cur_freq_mhz.attr,
+	NULL,
+};
+
 static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
@@ -463,6 +526,11 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = {
 	NULL,
 };
 
+void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp)
+{
+	dev_attr_grp->attrs = hl_dev_clk_attrs;
+}
+
 int hl_sysfs_init(struct hl_device *hdev)
 {
 	int rc;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 16637d629e67..f096cfc03bf3 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9361,7 +9361,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.debugfs_read64 = gaudi_debugfs_read64,
 	.debugfs_write64 = gaudi_debugfs_write64,
 	.debugfs_read_dma = gaudi_debugfs_read_dma,
-	.add_device_attr = hl_add_device_attr,
+	.add_device_attr = hl_sysfs_add_dev_clk_attr,
 	.handle_eqe = gaudi_handle_eqe,
 	.set_pll_profile = hl_set_pll_profile,
 	.get_events_stat = gaudi_get_events_stat,

From 9e2884ce989aa3c6e5a6c69b83664376cd2d98fd Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 21:40:06 +0200
Subject: [PATCH 07/50] habanalabs: get clk is common function

Retrieving the clock from the f/w is done exactly the same in ALL our
ASICs. Therefore, no real justification for doing it as an
ASIC-specific function.

The only thing is we need to check if we are running on simulator,
which doesn't require ASIC-specific callback.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h       |  7 ++-----
 drivers/misc/habanalabs/common/habanalabs_ioctl.c |  9 ++++-----
 drivers/misc/habanalabs/common/hwmgr.c            | 13 ++++++++-----
 drivers/misc/habanalabs/gaudi/gaudi.c             |  1 -
 drivers/misc/habanalabs/goya/goya.c               |  3 +--
 5 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 571998899253..ecfdfec7abdc 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -1190,7 +1190,6 @@ struct fw_load_mgr {
  * @halt_coresight: stop the ETF and ETR traces.
  * @ctx_init: context dependent initialization.
  * @ctx_fini: context dependent cleanup.
- * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
  * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
  * @load_firmware_to_device: load the firmware to the device's memory
  * @load_boot_fit_to_device: load boot fit to device's memory
@@ -1321,7 +1320,6 @@ struct hl_asic_funcs {
 	void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx);
 	int (*ctx_init)(struct hl_ctx *ctx);
 	void (*ctx_fini)(struct hl_ctx *ctx);
-	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 	u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
 	int (*load_firmware_to_device)(struct hl_device *hdev);
 	int (*load_boot_fit_to_device)(struct hl_device *hdev);
@@ -3122,8 +3120,7 @@ int hl_set_power(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value);
 int hl_get_power(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value);
-int hl_get_clk_rate(struct hl_device *hdev,
-			u32 *cur_clk, u32 *max_clk);
+int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
 void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp);
 void hw_sob_get(struct hl_hw_sob *hw_sob);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 3ba3a8ffda3e..ed516f911ee2 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -251,13 +251,12 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
-						&clk_rate.max_clk_rate_mhz);
+	rc = hl_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz);
 	if (rc)
 		return rc;
 
-	return copy_to_user(out, &clk_rate,
-		min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
+	return copy_to_user(out, &clk_rate, min_t(size_t, max_size, sizeof(clk_rate)))
+										? -EFAULT : 0;
 }
 
 static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
diff --git a/drivers/misc/habanalabs/common/hwmgr.c b/drivers/misc/habanalabs/common/hwmgr.c
index f0e5417560c3..e96126b46056 100644
--- a/drivers/misc/habanalabs/common/hwmgr.c
+++ b/drivers/misc/habanalabs/common/hwmgr.c
@@ -20,11 +20,16 @@ int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
+	if (!hdev->pdev) {
+		*cur_clk = 0;
+		*max_clk = 0;
+		return 0;
+	}
+
 	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
 
 	if (value < 0) {
-		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
-			value);
+		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
 		return value;
 	}
 
@@ -33,9 +38,7 @@ int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
 
 	if (value < 0) {
-		dev_err(hdev->dev,
-			"Failed to retrieve device current clock %ld\n",
-			value);
+		dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
 		return value;
 	}
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f096cfc03bf3..744d4305db40 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9385,7 +9385,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.halt_coresight = gaudi_halt_coresight,
 	.ctx_init = gaudi_ctx_init,
 	.ctx_fini = gaudi_ctx_fini,
-	.get_clk_rate = hl_get_clk_rate,
 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
 	.load_firmware_to_device = gaudi_load_firmware_to_device,
 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 954ef4d7bbf7..318b97c53ed7 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -5739,7 +5739,6 @@ static const struct hl_asic_funcs goya_funcs = {
 	.halt_coresight = goya_halt_coresight,
 	.ctx_init = goya_ctx_init,
 	.ctx_fini = goya_ctx_fini,
-	.get_clk_rate = hl_get_clk_rate,
 	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
 	.load_firmware_to_device = goya_load_firmware_to_device,
 	.load_boot_fit_to_device = goya_load_boot_fit_to_device,

From 8d96430784fc810e6adfa61bf6bf56cdcdeb7a69 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 21:57:57 +0200
Subject: [PATCH 08/50] habanalabs: remove hwmgr.c

The two remaining functions in this file belong to firmware_if.c,
as they communicate with the firmware.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/Makefile       |  2 +-
 drivers/misc/habanalabs/common/firmware_if.c  | 42 +++++++++++++++-
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +-
 .../misc/habanalabs/common/habanalabs_ioctl.c |  2 +-
 drivers/misc/habanalabs/common/hwmgr.c        | 48 -------------------
 drivers/misc/habanalabs/gaudi/gaudi.c         |  2 +-
 6 files changed, 46 insertions(+), 54 deletions(-)
 delete mode 100644 drivers/misc/habanalabs/common/hwmgr.c

diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
index 82c3824cad00..6ebe3c7001ff 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 		common/command_buffer.o common/hw_queue.o common/irq.o \
 		common/sysfs.o common/hwmon.o common/memory.o \
 		common/command_submission.o common/firmware_if.o \
-		common/state_dump.o common/hwmgr.o
+		common/state_dump.o
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 6775c5c3166b..f7ccbd288510 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -2682,3 +2682,43 @@ int hl_fw_init_cpu(struct hl_device *hdev)
 			hl_fw_dynamic_init_cpu(hdev, fw_loader) :
 			hl_fw_static_init_cpu(hdev, fw_loader);
 }
+
+void hl_fw_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
+{
+	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
+				hdev->asic_prop.max_freq_value);
+}
+
+int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
+{
+	long value;
+
+	if (!hl_device_operational(hdev, NULL))
+		return -ENODEV;
+
+	if (!hdev->pdev) {
+		*cur_clk = 0;
+		*max_clk = 0;
+		return 0;
+	}
+
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
+
+	if (value < 0) {
+		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
+		return value;
+	}
+
+	*max_clk = (value / 1000 / 1000);
+
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
+
+	if (value < 0) {
+		dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
+		return value;
+	}
+
+	*cur_clk = (value / 1000 / 1000);
+
+	return 0;
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index ecfdfec7abdc..c782d44d58d5 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3120,8 +3120,8 @@ int hl_set_power(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value);
 int hl_get_power(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value);
-int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
-void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
+int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+void hl_fw_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
 void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp);
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index ed516f911ee2..d19097576b05 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -251,7 +251,7 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	rc = hl_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz);
+	rc = hl_fw_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/common/hwmgr.c b/drivers/misc/habanalabs/common/hwmgr.c
deleted file mode 100644
index e96126b46056..000000000000
--- a/drivers/misc/habanalabs/common/hwmgr.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2019-2021 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
-{
-	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
-			hdev->asic_prop.max_freq_value);
-}
-
-int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
-{
-	long value;
-
-	if (!hl_device_operational(hdev, NULL))
-		return -ENODEV;
-
-	if (!hdev->pdev) {
-		*cur_clk = 0;
-		*max_clk = 0;
-		return 0;
-	}
-
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
-
-	if (value < 0) {
-		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
-		return value;
-	}
-
-	*max_clk = (value / 1000 / 1000);
-
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
-
-	if (value < 0) {
-		dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
-		return value;
-	}
-
-	*cur_clk = (value / 1000 / 1000);
-
-	return 0;
-}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 744d4305db40..ae86147dfde4 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9363,7 +9363,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.debugfs_read_dma = gaudi_debugfs_read_dma,
 	.add_device_attr = hl_sysfs_add_dev_clk_attr,
 	.handle_eqe = gaudi_handle_eqe,
-	.set_pll_profile = hl_set_pll_profile,
+	.set_pll_profile = hl_fw_set_pll_profile,
 	.get_events_stat = gaudi_get_events_stat,
 	.read_pte = gaudi_read_pte,
 	.write_pte = gaudi_write_pte,

From 092a31c5266282838e98fffc5e80b50e53b7bec9 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 22:35:09 +0200
Subject: [PATCH 09/50] habanalabs: move more f/w functions to firmware_if.c

For better maintainability, try to concentrate all the common functions
that communicate with the f/w in firmware_if.c

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c      |   6 +-
 drivers/misc/habanalabs/common/firmware_if.c | 101 ++++++++++++++++-
 drivers/misc/habanalabs/common/habanalabs.h  |  44 +++-----
 drivers/misc/habanalabs/common/sysfs.c       | 110 +------------------
 drivers/misc/habanalabs/goya/goya_hwmgr.c    |  38 +++----
 5 files changed, 141 insertions(+), 158 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 9955e0d3502b..c98a0a81e7d5 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -1228,7 +1228,7 @@ kill_processes:
 			goto out_err;
 		}
 
-		hl_set_max_power(hdev);
+		hl_fw_set_max_power(hdev);
 	} else {
 		rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
 		if (rc) {
@@ -1534,7 +1534,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	/* Need to call this again because the max power might change,
 	 * depending on card type for certain ASICs
 	 */
-	hl_set_max_power(hdev);
+	hl_fw_set_max_power(hdev);
 
 	/*
 	 * hl_hwmon_init() must be called after device_late_init(), because only
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index f7ccbd288510..508ee349f4e4 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -2685,7 +2685,7 @@ int hl_fw_init_cpu(struct hl_device *hdev)
 
 void hl_fw_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 {
-	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
+	hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
 				hdev->asic_prop.max_freq_value);
 }
 
@@ -2702,7 +2702,7 @@ int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 		return 0;
 	}
 
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
+	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
 
 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
@@ -2711,7 +2711,7 @@ int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	*max_clk = (value / 1000 / 1000);
 
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
+	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
 
 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
@@ -2722,3 +2722,98 @@ int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	return 0;
 }
+
+long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
+{
+	struct cpucp_packet pkt;
+	u32 used_pll_idx;
+	u64 result;
+	int rc;
+
+	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+	if (rc)
+		return rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	if (curr)
+		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
+						CPUCP_PKT_CTL_OPCODE_SHIFT);
+	else
+		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+	pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
+
+	if (rc) {
+		dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
+			used_pll_idx, rc);
+		return rc;
+	}
+
+	return (long) result;
+}
+
+void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
+{
+	struct cpucp_packet pkt;
+	u32 used_pll_idx;
+	int rc;
+
+	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+	if (rc)
+		return;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
+	pkt.value = cpu_to_le64(freq);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+
+	if (rc)
+		dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
+			used_pll_idx, rc);
+}
+
+u64 hl_fw_get_max_power(struct hl_device *hdev)
+{
+	struct cpucp_packet pkt;
+	u64 result;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
+
+	if (rc) {
+		dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
+		return (u64) rc;
+	}
+
+	return result;
+}
+
+void hl_fw_set_max_power(struct hl_device *hdev)
+{
+	struct cpucp_packet pkt;
+	int rc;
+
+	/* TODO: remove this after simulator supports this packet */
+	if (!hdev->pdev)
+		return;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.value = cpu_to_le64(hdev->max_power);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+
+	if (rc)
+		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index c782d44d58d5..2bf73e5ce47f 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3092,37 +3092,25 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
 int hl_pci_init(struct hl_device *hdev);
 void hl_pci_fini(struct hl_device *hdev);
 
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
-								bool curr);
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
-								u64 freq);
-int hl_get_temperature(struct hl_device *hdev,
-		       int sensor_index, u32 attr, long *value);
-int hl_set_temperature(struct hl_device *hdev,
-		       int sensor_index, u32 attr, long value);
-int hl_get_voltage(struct hl_device *hdev,
-		   int sensor_index, u32 attr, long *value);
-int hl_get_current(struct hl_device *hdev,
-		   int sensor_index, u32 attr, long *value);
-int hl_get_fan_speed(struct hl_device *hdev,
-		     int sensor_index, u32 attr, long *value);
-int hl_get_pwm_info(struct hl_device *hdev,
-		    int sensor_index, u32 attr, long *value);
-void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
-			long value);
-u64 hl_get_max_power(struct hl_device *hdev);
-void hl_set_max_power(struct hl_device *hdev);
-int hl_set_voltage(struct hl_device *hdev,
-			int sensor_index, u32 attr, long value);
-int hl_set_current(struct hl_device *hdev,
-			int sensor_index, u32 attr, long value);
-int hl_set_power(struct hl_device *hdev,
-			int sensor_index, u32 attr, long value);
-int hl_get_power(struct hl_device *hdev,
-			int sensor_index, u32 attr, long *value);
+long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
+void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
+int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+u64 hl_fw_get_max_power(struct hl_device *hdev);
+void hl_fw_set_max_power(struct hl_device *hdev);
+int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
 int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 void hl_fw_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
 void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp);
+
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
 void hl_encaps_handle_do_release(struct kref *ref);
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index d66cf43788d8..ce3538d36bb7 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -9,106 +9,6 @@
 
 #include <linux/pci.h>
 
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
-{
-	struct cpucp_packet pkt;
-	u32 used_pll_idx;
-	u64 result;
-	int rc;
-
-	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
-	if (rc)
-		return rc;
-
-	memset(&pkt, 0, sizeof(pkt));
-
-	if (curr)
-		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
-						CPUCP_PKT_CTL_OPCODE_SHIFT);
-	else
-		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
-						CPUCP_PKT_CTL_OPCODE_SHIFT);
-	pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
-
-	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						0, &result);
-
-	if (rc) {
-		dev_err(hdev->dev,
-			"Failed to get frequency of PLL %d, error %d\n",
-			used_pll_idx, rc);
-		return rc;
-	}
-
-	return (long) result;
-}
-
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
-{
-	struct cpucp_packet pkt;
-	u32 used_pll_idx;
-	int rc;
-
-	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
-	if (rc)
-		return;
-
-	memset(&pkt, 0, sizeof(pkt));
-
-	pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
-					CPUCP_PKT_CTL_OPCODE_SHIFT);
-	pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
-	pkt.value = cpu_to_le64(freq);
-
-	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						0, NULL);
-
-	if (rc)
-		dev_err(hdev->dev,
-			"Failed to set frequency to PLL %d, error %d\n",
-			used_pll_idx, rc);
-}
-
-u64 hl_get_max_power(struct hl_device *hdev)
-{
-	struct cpucp_packet pkt;
-	u64 result;
-	int rc;
-
-	memset(&pkt, 0, sizeof(pkt));
-
-	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
-				CPUCP_PKT_CTL_OPCODE_SHIFT);
-
-	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						0, &result);
-
-	if (rc) {
-		dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
-		return (u64) rc;
-	}
-
-	return result;
-}
-
-void hl_set_max_power(struct hl_device *hdev)
-{
-	struct cpucp_packet pkt;
-	int rc;
-
-	memset(&pkt, 0, sizeof(pkt));
-
-	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
-				CPUCP_PKT_CTL_OPCODE_SHIFT);
-	pkt.value = cpu_to_le64(hdev->max_power);
-
-	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						0, NULL);
-
-	if (rc)
-		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
-}
-
 static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
@@ -117,7 +17,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
+	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
 
 	hdev->asic_prop.max_freq_value = value;
 
@@ -144,7 +44,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribut
 
 	hdev->asic_prop.max_freq_value = value * 1000 * 1000;
 
-	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value);
+	hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value);
 
 fail:
 	return count;
@@ -158,7 +58,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
+	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
 
 	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
 }
@@ -386,7 +286,7 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	val = hl_get_max_power(hdev);
+	val = hl_fw_get_max_power(hdev);
 
 	return sprintf(buf, "%lu\n", val);
 }
@@ -411,7 +311,7 @@ static ssize_t max_power_store(struct device *dev,
 	}
 
 	hdev->max_power = value;
-	hl_set_max_power(hdev);
+	hl_fw_set_max_power(hdev);
 
 out:
 	return count;
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index 76b47749affe..f9f3afe94056 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -13,19 +13,19 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 
 	switch (freq) {
 	case PLL_HIGH:
-		hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
-		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
-		hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
+		hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
+		hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
+		hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
 		break;
 	case PLL_LOW:
-		hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
-		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
-		hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
+		hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
+		hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
+		hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
 		break;
 	case PLL_LAST:
-		hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
-		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
-		hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
+		hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
+		hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
+		hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
 		break;
 	default:
 		dev_err(hdev->dev, "unknown frequency setting\n");
@@ -41,7 +41,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
+	value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -74,7 +74,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, HL_GOYA_MME_PLL, value);
+	hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, value);
 	goya->mme_clk = value;
 
 fail:
@@ -90,7 +90,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
+	value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -123,7 +123,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
+	hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
 	goya->tpc_clk = value;
 
 fail:
@@ -139,7 +139,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false);
+	value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -172,7 +172,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, HL_GOYA_IC_PLL, value);
+	hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, value);
 	goya->ic_clk = value;
 
 fail:
@@ -188,7 +188,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
+	value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, true);
 
 	if (value < 0)
 		return value;
@@ -205,7 +205,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
+	value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
 
 	if (value < 0)
 		return value;
@@ -222,7 +222,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true);
+	value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, true);
 
 	if (value < 0)
 		return value;

From 7ae439a0614f3718df203d9f20eac7b3161867f4 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 22:49:12 +0200
Subject: [PATCH 10/50] habanalabs: remove asic callback set_pll_profile()

Setting PLL profile is the same for all ASICs, except for GOYA.
However, because this function is never called from common code, there
is no need to have an asic-specific callback function.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 2 +-
 drivers/misc/habanalabs/common/habanalabs.h  | 5 +----
 drivers/misc/habanalabs/gaudi/gaudi.c        | 3 +--
 drivers/misc/habanalabs/goya/goya.c          | 3 +--
 drivers/misc/habanalabs/goya/goya_hwmgr.c    | 3 +++
 5 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 508ee349f4e4..93e9045461ae 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -2683,7 +2683,7 @@ int hl_fw_init_cpu(struct hl_device *hdev)
 			hl_fw_static_init_cpu(hdev, fw_loader);
 }
 
-void hl_fw_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
+void hl_fw_set_pll_profile(struct hl_device *hdev)
 {
 	hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
 				hdev->asic_prop.max_freq_value);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 2bf73e5ce47f..3dc155948730 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1161,7 +1161,6 @@ struct fw_load_mgr {
  *                    internal memory via DMA engine.
  * @add_device_attr: add ASIC specific device attributes.
  * @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
- * @set_pll_profile: change PLL profile (manual/automatic).
  * @get_events_stat: retrieve event queue entries histogram.
  * @read_pte: read MMU page table entry from DRAM.
  * @write_pte: write MMU page table entry to DRAM.
@@ -1291,8 +1290,6 @@ struct hl_asic_funcs {
 				struct attribute_group *dev_attr_grp);
 	void (*handle_eqe)(struct hl_device *hdev,
 				struct hl_eq_entry *eq_entry);
-	void (*set_pll_profile)(struct hl_device *hdev,
-			enum hl_pll_frequency freq);
 	void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
 				u32 *size);
 	u64 (*read_pte)(struct hl_device *hdev, u64 addr);
@@ -3108,7 +3105,7 @@ int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long valu
 int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
 int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
-void hl_fw_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
+void hl_fw_set_pll_profile(struct hl_device *hdev);
 void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp);
 
 void hw_sob_get(struct hl_hw_sob *hw_sob);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index ae86147dfde4..bb0b4ff3451d 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1635,7 +1635,7 @@ static int gaudi_late_init(struct hl_device *hdev)
 	 */
 	gaudi_mmu_prepare(hdev, 1);
 
-	hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
+	hl_fw_set_pll_profile(hdev);
 
 	return 0;
 
@@ -9363,7 +9363,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.debugfs_read_dma = gaudi_debugfs_read_dma,
 	.add_device_attr = hl_sysfs_add_dev_clk_attr,
 	.handle_eqe = gaudi_handle_eqe,
-	.set_pll_profile = hl_fw_set_pll_profile,
 	.get_events_stat = gaudi_get_events_stat,
 	.read_pte = gaudi_read_pte,
 	.write_pte = gaudi_write_pte,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 318b97c53ed7..916a718dddac 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -893,7 +893,7 @@ int goya_late_init(struct hl_device *hdev)
 
 	goya->pm_mng_profile = PM_AUTO;
 
-	hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
+	goya_set_pll_profile(hdev, PLL_LOW);
 
 	schedule_delayed_work(&goya->goya_work->work_freq,
 		usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
@@ -5717,7 +5717,6 @@ static const struct hl_asic_funcs goya_funcs = {
 	.debugfs_read_dma = goya_debugfs_read_dma,
 	.add_device_attr = goya_add_device_attr,
 	.handle_eqe = goya_handle_eqe,
-	.set_pll_profile = goya_set_pll_profile,
 	.get_events_stat = goya_get_events_stat,
 	.read_pte = goya_read_pte,
 	.write_pte = goya_write_pte,
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index f9f3afe94056..7d4f26c67d8e 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -11,6 +11,9 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 {
 	struct goya_device *goya = hdev->asic_specific;
 
+	if (!hdev->pdev)
+		return;
+
 	switch (freq) {
 	case PLL_HIGH:
 		hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);

From be028a3648f3791ce8bc74608477d7846be72c24 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 22:55:39 +0200
Subject: [PATCH 11/50] habanalabs: rename dev_attr_grp to dev_clk_attr_grp

In this attribute group we are only adding clocks. This is in
preparation for adding a device specific attribute group which is
not related to clocks.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 5 ++---
 drivers/misc/habanalabs/common/sysfs.c      | 4 ++--
 drivers/misc/habanalabs/goya/goyaP.h        | 5 ++---
 drivers/misc/habanalabs/goya/goya_hwmgr.c   | 7 +++----
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 3dc155948730..c116e30a5543 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1286,8 +1286,7 @@ struct hl_asic_funcs {
 				bool user_address, u64 val);
 	int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
 				void *blob_addr);
-	void (*add_device_attr)(struct hl_device *hdev,
-				struct attribute_group *dev_attr_grp);
+	void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
 	void (*handle_eqe)(struct hl_device *hdev,
 				struct hl_eq_entry *eq_entry);
 	void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
@@ -3106,7 +3105,7 @@ int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value)
 int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
 int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 void hl_fw_set_pll_profile(struct hl_device *hdev);
-void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp);
+void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
 
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index ce3538d36bb7..606cee6252fe 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -426,9 +426,9 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = {
 	NULL,
 };
 
-void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp)
+void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp)
 {
-	dev_attr_grp->attrs = hl_dev_clk_attrs;
+	dev_clk_attr_grp->attrs = hl_dev_clk_attrs;
 }
 
 int hl_sysfs_init(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 3740fd25bf84..d71e2f4de757 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -217,8 +217,7 @@ u64 goya_get_max_power(struct hl_device *hdev);
 void goya_set_max_power(struct hl_device *hdev, u64 value);
 
 void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
-void goya_add_device_attr(struct hl_device *hdev,
-			struct attribute_group *dev_attr_grp);
+void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
 int goya_cpucp_info_get(struct hl_device *hdev);
 int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
 void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index 7d4f26c67d8e..c641ff1e23b9 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -350,7 +350,7 @@ static DEVICE_ATTR_RW(pm_mng_profile);
 static DEVICE_ATTR_RW(tpc_clk);
 static DEVICE_ATTR_RO(tpc_clk_curr);
 
-static struct attribute *goya_dev_attrs[] = {
+static struct attribute *goya_clk_dev_attrs[] = {
 	&dev_attr_high_pll.attr,
 	&dev_attr_ic_clk.attr,
 	&dev_attr_ic_clk_curr.attr,
@@ -362,8 +362,7 @@ static struct attribute *goya_dev_attrs[] = {
 	NULL,
 };
 
-void goya_add_device_attr(struct hl_device *hdev,
-			struct attribute_group *dev_attr_grp)
+void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp)
 {
-	dev_attr_grp->attrs = goya_dev_attrs;
+	dev_clk_attr_grp->attrs = goya_clk_dev_attrs;
 }

From b62ff1a4123693551383f306ab736758e6fe0d84 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 8 Jan 2022 23:45:32 +0200
Subject: [PATCH 12/50] habanalabs: add vrm version to sysfs

infineon version is only applicable to GOYA and GAUDI. For later
ASICs, we display the Voltage Regulator Monitor f/w version.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/sysfs-driver-habanalabs       | 10 +++-
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +-
 drivers/misc/habanalabs/common/sysfs.c        | 46 +++++++++++--------
 drivers/misc/habanalabs/gaudi/gaudi.c         | 25 +++++++++-
 drivers/misc/habanalabs/gaudi/gaudiP.h        |  4 +-
 drivers/misc/habanalabs/goya/goyaP.h          |  3 +-
 drivers/misc/habanalabs/goya/goya_hwmgr.c     | 21 ++++++++-
 7 files changed, 85 insertions(+), 28 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index 1f127f71d2b4..c16a573eb680 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -115,7 +115,7 @@ What:           /sys/class/habanalabs/hl<n>/infineon_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Version of the Device's power supply F/W code
+Description:    Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
 
 What:           /sys/class/habanalabs/hl<n>/max_power
 Date:           Jan 2019
@@ -220,4 +220,10 @@ What:           /sys/class/habanalabs/hl<n>/uboot_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Version of the u-boot running on the device's CPU
\ No newline at end of file
+Description:    Version of the u-boot running on the device's CPU
+
+What:           /sys/class/habanalabs/hl<n>/vrm_ver
+Date:           Jan 2022
+KernelVersion:  not yet upstreamed
+Contact:        ogabbay@kernel.org
+Description:    Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index c116e30a5543..b8acfef9577a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1286,7 +1286,8 @@ struct hl_asic_funcs {
 				bool user_address, u64 val);
 	int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
 				void *blob_addr);
-	void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
+	void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
+				struct attribute_group *dev_vrm_attr_grp);
 	void (*handle_eqe)(struct hl_device *hdev,
 				struct hl_eq_entry *eq_entry);
 	void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
@@ -3106,6 +3107,7 @@ int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value
 int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 void hl_fw_set_pll_profile(struct hl_device *hdev);
 void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
+void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp);
 
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 606cee6252fe..65b132fa6dbd 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -69,7 +69,26 @@ static DEVICE_ATTR_RO(clk_cur_freq_mhz);
 static struct attribute *hl_dev_clk_attrs[] = {
 	&dev_attr_clk_max_freq_mhz.attr,
 	&dev_attr_clk_cur_freq_mhz.attr,
-	NULL,
+};
+
+static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+	struct cpucp_info *cpucp_info;
+
+	cpucp_info = &hdev->asic_prop.cpucp_info;
+
+	if (cpucp_info->infineon_second_stage_version)
+		return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
+				le32_to_cpu(cpucp_info->infineon_second_stage_version));
+	else
+		return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
+}
+
+static DEVICE_ATTR_RO(vrm_ver);
+
+static struct attribute *hl_dev_vrm_attrs[] = {
+	&dev_attr_vrm_ver.attr,
 };
 
 static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
@@ -121,20 +140,6 @@ static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
 }
 
-static ssize_t infineon_ver_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct hl_device *hdev = dev_get_drvdata(dev);
-
-	if (hdev->asic_prop.cpucp_info.infineon_second_stage_version)
-		return sprintf(buf, "%#04x %#04x\n",
-			le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version),
-			le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version));
-	else
-		return sprintf(buf, "%#04x\n",
-			le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version));
-}
-
 static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
@@ -357,7 +362,6 @@ static DEVICE_ATTR_RO(device_type);
 static DEVICE_ATTR_RO(fuse_ver);
 static DEVICE_ATTR_WO(hard_reset);
 static DEVICE_ATTR_RO(hard_reset_cnt);
-static DEVICE_ATTR_RO(infineon_ver);
 static DEVICE_ATTR_RW(max_power);
 static DEVICE_ATTR_RO(pci_addr);
 static DEVICE_ATTR_RO(preboot_btl_ver);
@@ -383,7 +387,6 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_fuse_ver.attr,
 	&dev_attr_hard_reset.attr,
 	&dev_attr_hard_reset_cnt.attr,
-	&dev_attr_infineon_ver.attr,
 	&dev_attr_max_power.attr,
 	&dev_attr_pci_addr.attr,
 	&dev_attr_preboot_btl_ver.attr,
@@ -404,10 +407,12 @@ static struct attribute_group hl_dev_attr_group = {
 };
 
 static struct attribute_group hl_dev_clks_attr_group;
+static struct attribute_group hl_dev_vrm_attr_group;
 
 static const struct attribute_group *hl_dev_attr_groups[] = {
 	&hl_dev_attr_group,
 	&hl_dev_clks_attr_group,
+	&hl_dev_vrm_attr_group,
 	NULL,
 };
 
@@ -431,13 +436,18 @@ void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *d
 	dev_clk_attr_grp->attrs = hl_dev_clk_attrs;
 }
 
+void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp)
+{
+	dev_vrm_attr_grp->attrs = hl_dev_vrm_attrs;
+}
+
 int hl_sysfs_init(struct hl_device *hdev)
 {
 	int rc;
 
 	hdev->max_power = hdev->asic_prop.max_power_default;
 
-	hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
+	hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group, &hl_dev_vrm_attr_group);
 
 	rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
 	if (rc) {
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index bb0b4ff3451d..c4e41856ad31 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9324,6 +9324,29 @@ static u32 *gaudi_get_stream_master_qid_arr(void)
 	return gaudi_stream_master;
 }
 
+static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+	struct cpucp_info *cpucp_info;
+
+	cpucp_info = &hdev->asic_prop.cpucp_info;
+
+	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
+}
+
+static DEVICE_ATTR_RO(infineon_ver);
+
+static struct attribute *gaudi_vrm_dev_attrs[] = {
+	&dev_attr_infineon_ver.attr,
+};
+
+static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
+					struct attribute_group *dev_vrm_attr_grp)
+{
+	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
+	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -9361,7 +9384,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.debugfs_read64 = gaudi_debugfs_read64,
 	.debugfs_write64 = gaudi_debugfs_write64,
 	.debugfs_read_dma = gaudi_debugfs_read_dma,
-	.add_device_attr = hl_sysfs_add_dev_clk_attr,
+	.add_device_attr = gaudi_add_device_attr,
 	.handle_eqe = gaudi_handle_eqe,
 	.get_events_stat = gaudi_get_events_stat,
 	.read_pte = gaudi_read_pte,
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index fdadcbf3ef80..54de7c599072 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2019-2020 HabanaLabs, Ltd.
+ * Copyright 2019-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -351,8 +351,6 @@ struct gaudi_device {
 
 void gaudi_init_security(struct hl_device *hdev);
 void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
-void gaudi_add_device_attr(struct hl_device *hdev,
-			struct attribute_group *dev_attr_grp);
 int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
 void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index d71e2f4de757..647f57402616 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -217,7 +217,8 @@ u64 goya_get_max_power(struct hl_device *hdev);
 void goya_set_max_power(struct hl_device *hdev, u64 value);
 
 void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
-void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
+void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
+				struct attribute_group *dev_vrm_attr_grp);
 int goya_cpucp_info_get(struct hl_device *hdev);
 int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
 void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index c641ff1e23b9..6580fc6a486a 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -359,10 +359,27 @@ static struct attribute *goya_clk_dev_attrs[] = {
 	&dev_attr_pm_mng_profile.attr,
 	&dev_attr_tpc_clk.attr,
 	&dev_attr_tpc_clk_curr.attr,
-	NULL,
 };
 
-void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp)
+static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+	struct cpucp_info *cpucp_info;
+
+	cpucp_info = &hdev->asic_prop.cpucp_info;
+
+	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
+}
+
+static DEVICE_ATTR_RO(infineon_ver);
+
+static struct attribute *goya_vrm_dev_attrs[] = {
+	&dev_attr_infineon_ver.attr,
+};
+
+void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
+				struct attribute_group *dev_vrm_attr_grp)
 {
 	dev_clk_attr_grp->attrs = goya_clk_dev_attrs;
+	dev_vrm_attr_grp->attrs = goya_vrm_dev_attrs;
 }

From 2491533808d71c2229582d7427dffbd9494cc85f Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 10 Jan 2022 17:01:50 +0200
Subject: [PATCH 13/50] habanalabs: remove power9 workaround for dma support

We don't need this workaround anymore.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  3 ---
 drivers/misc/habanalabs/common/pci/pci.c    |  5 +----
 drivers/misc/habanalabs/gaudi/gaudi.c       |  9 +--------
 drivers/misc/habanalabs/goya/goya.c         | 11 +----------
 4 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index b8acfef9577a..6c7a60210416 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2604,8 +2604,6 @@ struct hl_reset_info {
  * @in_debug: whether the device is in a state where the profiling/tracing infrastructure
  *            can be used. This indication is needed because in some ASICs we need to do
  *            specific operations to enable that infrastructure.
- * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
- *                           only to POWER9 machines.
  * @cdev_sysfs_created: were char devices and sysfs nodes created.
  * @stop_on_err: true if engines should stop on error.
  * @supports_sync_stream: is sync stream supported.
@@ -2728,7 +2726,6 @@ struct hl_device {
 	u8				device_cpu_disabled;
 	u8				dma_mask;
 	u8				in_debug;
-	u8				power9_64bit_dma_enable;
 	u8				cdev_sysfs_created;
 	u8				stop_on_err;
 	u8				supports_sync_stream;
diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index 0b5366cc84fd..a6ffa342070c 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -338,10 +338,7 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
 				lower_32_bits(outbound_region_end_address));
 	rc |= hl_pci_iatu_write(hdev, 0x014, 0);
 
-	if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
-		rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
-	else
-		rc |= hl_pci_iatu_write(hdev, 0x018, 0);
+	rc |= hl_pci_iatu_write(hdev, 0x018, 0);
 
 	rc |= hl_pci_iatu_write(hdev, 0x020,
 				upper_32_bits(outbound_region_end_address));
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index c4e41856ad31..66c90164be6b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9008,14 +9008,7 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data)
 
 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
 {
-	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
-							HL_POWER9_HOST_MAGIC) {
-		hdev->power9_64bit_dma_enable = 1;
-		hdev->dma_mask = 64;
-	} else {
-		hdev->power9_64bit_dma_enable = 0;
-		hdev->dma_mask = 48;
-	}
+	hdev->dma_mask = 48;
 }
 
 static u64 gaudi_get_device_time(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 916a718dddac..1bd23578cb83 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5554,16 +5554,7 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
 
 static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
 {
-	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
-							HL_POWER9_HOST_MAGIC) {
-		dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
-		hdev->power9_64bit_dma_enable = 1;
-		hdev->dma_mask = 64;
-	} else {
-		dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
-		hdev->power9_64bit_dma_enable = 0;
-		hdev->dma_mask = 48;
-	}
+	hdev->dma_mask = 48;
 }
 
 u64 goya_get_device_time(struct hl_device *hdev)

From 6ba2c0ce26929bc7dcae88e3c3450bd75d9a2423 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 10 Jan 2022 21:58:42 +0200
Subject: [PATCH 14/50] habanalabs: use common wrapper for MMU cache
 invalidation

We have a common function that wraps the call to the MMU cache
invalidation function, which is ASIC-specific. The wrapper checks
the return value and prints error if necessary. For consistency, try
to use the wrapper when possible.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 6 +++---
 drivers/misc/habanalabs/goya/goya.c   | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 66c90164be6b..c58e21e2db3b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3855,7 +3855,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
 	/* mem cache invalidation */
 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
 
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
+	hl_mmu_invalidate_cache(hdev, true, 0);
 
 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
@@ -8595,7 +8595,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
 			hdev->internal_cb_pool_dma_addr,
 			HOST_SPACE_INTERNAL_CB_SZ);
 
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
+	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
 	mutex_unlock(&ctx->mmu_lock);
 
 	if (rc)
@@ -8630,7 +8630,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 			HOST_SPACE_INTERNAL_CB_SZ);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
+	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
 	mutex_unlock(&ctx->mmu_lock);
 
 	gen_pool_destroy(hdev->internal_cb_pool);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 1bd23578cb83..d8c6dea7c809 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2700,8 +2700,7 @@ int goya_mmu_init(struct hl_device *hdev)
 	WREG32_AND(mmSTLB_STLB_FEATURE_EN,
 			(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
 
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
-					MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
+	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
 
 	WREG32(mmMMU_MMU_ENABLE, 1);
 	WREG32(mmMMU_SPI_MASK, 0xF);
@@ -5341,7 +5340,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
 	/* Treat as invalidate all because there is no range invalidation
 	 * in Goya
 	 */
-	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
+	return hl_mmu_invalidate_cache(hdev, is_hard, flags);
 }
 
 int goya_send_heartbeat(struct hl_device *hdev)

From 4c01e524b21ff4342d8ab16a32a0acf91db761e3 Mon Sep 17 00:00:00 2001
From: Rajaravi Krishna Katta <rkatta@habana.ai>
Date: Wed, 29 Dec 2021 13:13:45 +0200
Subject: [PATCH 15/50] habanalabs: sysfs support for fw os version

Adds new sysfs entry to display firmware os version
/sys/class/habanalabs/hl<n>/fw_os_ver

Signed-off-by: Rajaravi Krishna Katta <rkatta@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 Documentation/ABI/testing/sysfs-driver-habanalabs |  6 ++++++
 drivers/misc/habanalabs/common/sysfs.c            | 10 ++++++++++
 drivers/misc/habanalabs/include/common/cpucp_if.h |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index c16a573eb680..96646fb2e7a1 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -69,6 +69,12 @@ KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the device's version from the eFuse
 
+What:           /sys/class/habanalabs/hl<n>/fw_os_ver
+Date:           Dec 2021
+KernelVersion:  5.18
+Contact:        ogabbay@kernel.org
+Description:    Version of the firmware OS running on the device's CPU
+
 What:           /sys/class/habanalabs/hl<n>/hard_reset
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 65b132fa6dbd..3f220dd3b6b2 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -156,6 +156,14 @@ static ssize_t thermal_ver_show(struct device *dev,
 	return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
 }
 
+static ssize_t fw_os_ver_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.fw_os_version);
+}
+
 static ssize_t preboot_btl_ver_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -370,6 +378,7 @@ static DEVICE_ATTR_RO(soft_reset_cnt);
 static DEVICE_ATTR_RO(status);
 static DEVICE_ATTR_RO(thermal_ver);
 static DEVICE_ATTR_RO(uboot_ver);
+static DEVICE_ATTR_RO(fw_os_ver);
 
 static struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
@@ -393,6 +402,7 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_status.attr,
 	&dev_attr_thermal_ver.attr,
 	&dev_attr_uboot_ver.attr,
+	&dev_attr_fw_os_ver.attr,
 	NULL,
 };
 
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 737c39f33f05..f00db22f98fb 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -780,6 +780,7 @@ struct cpucp_security_info {
  *                     (0 = functional 1 = binned)
  * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
  *                     (0 = functional 1 = binned)
+ * @fw_os_version: Firmware OS Version
  */
 struct cpucp_info {
 	struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@@ -807,6 +808,7 @@ struct cpucp_info {
 	__le32 reserved6;
 	__u8 pll_map[PLL_MAP_LEN];
 	__le64 mme_binning_mask;
+	__u8 fw_os_version[VERSION_MAX_LEN];
 };
 
 struct cpucp_mac_addr {

From e24a62cb68d117858f311d14ca366a18a44120a8 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 13 Jan 2022 10:05:38 +0200
Subject: [PATCH 16/50] habanalabs: there is no kernel TDR in future ASICs

In future ASICs, there is no kernel TDR for new workloads that are
submitted directly from user-space to the device.

Therefore, the driver can NEVER know that a workload has timed-out.

So, when the user asks us to wait for interrupt on the workload's
completion, and the wait has timed-out, it doesn't mean the workload
has timed-out. It only means the wait has timed-out, which is NOT an
error from driver's perspective.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 2f40b937c59f..29e0549ff31d 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2932,11 +2932,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				rc = -EIO;
 				*status = HL_WAIT_CS_STATUS_ABORTED;
 			} else {
-				dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
-						interrupt->interrupt_id);
-				rc = -ETIMEDOUT;
+				/* The wait has timed-out. We don't know anything beyond that
+				 * because the workload wasn't submitted through the driver.
+				 * Therefore, from driver's perspective, the workload is still
+				 * executing.
+				 */
+				rc = 0;
+				*status = HL_WAIT_CS_STATUS_BUSY;
 			}
-			*status = HL_WAIT_CS_STATUS_BUSY;
 		}
 	}
 
@@ -3049,6 +3052,12 @@ wait_again:
 			interrupt->interrupt_id);
 		rc = -EINTR;
 	} else {
+		/* The wait has timed-out. We don't know anything beyond that
+		 * because the workload wasn't submitted through the driver.
+		 * Therefore, from driver's perspective, the workload is still
+		 * executing.
+		 */
+		rc = 0;
 		*status = HL_WAIT_CS_STATUS_BUSY;
 	}
 

From 1dc6cc4b38e5456b7b8741a6194018d30a521d38 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Tue, 4 Jan 2022 14:42:50 +0200
Subject: [PATCH 17/50] habanalabs: duplicate HOP table props to MMU props

In order to support several device MMU blocks with different
architectures (e.g. different HOP table size) we need to move to
per-MMU properties rather than keeping those properties as ASIC
properties.

Refactoring the code to use "per-MMU proprties" is a major effort.

To start making the transition towards this goal but still support
taking the properties from ASIC properties (for code that currently
uses them) this patch copies some of the properties to the "per-MMU"
properties and later, when implementing the per-MMU properties, we
would be able to delete the MMU props from the ASIC props.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 3 +++
 drivers/misc/habanalabs/goya/goya.c   | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index c58e21e2db3b..f2242aa3baa2 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -613,6 +613,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->pmmu.page_size = PAGE_SIZE_4KB;
 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 	prop->pmmu.last_mask = LAST_MASK;
+	/* TODO: will be duplicated until implementing per-MMU props */
+	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
+	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
 
 	/* PMMU and HPMMU are the same except of page size */
 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index d8c6dea7c809..3785fb33260d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -430,6 +430,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->dmmu.page_size = PAGE_SIZE_2MB;
 	prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
 	prop->dmmu.last_mask = LAST_MASK;
+	/* TODO: will be duplicated until implementing per-MMU props */
+	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
+	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
 
 	/* shifts and masks are the same in PMMU and DMMU */
 	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
@@ -438,6 +441,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->pmmu.page_size = PAGE_SIZE_4KB;
 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 	prop->pmmu.last_mask = LAST_MASK;
+	/* TODO: will be duplicated until implementing per-MMU props */
+	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
+	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
 
 	/* PMMU and HPMMU are the same except of page size */
 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));

From 7169f0dfecf7745c810782f3f5fe22ba2a8d9ba1 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sun, 16 Jan 2022 00:18:32 +0200
Subject: [PATCH 18/50] habanalabs: don't free phys_pg_pack inside lock

Freeing phys_pg_pack includes calling to scrubbing functions of the
device's memory, taking locks and possibly even calling reset.

This is not something that should be done while holding a device-wide
spinlock.

Therefore, save the relevant objects on a local linked-list and after
releasing the spinlock, traverse that list and free the phys_pg_pack
objects.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  3 +++
 drivers/misc/habanalabs/common/memory.c     | 16 +++++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6c7a60210416..9c8374d88907 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1738,6 +1738,8 @@ struct hl_vm_hw_block_list_node {
  * @pages: the physical page array.
  * @npages: num physical pages in the pack.
  * @total_size: total size of all the pages in this list.
+ * @node: used to attach to deletion list that is used when all the allocations are cleared
+ *        at the teardown of the context.
  * @mapping_cnt: number of shared mappings.
  * @exporting_cnt: number of dma-buf exporting.
  * @asid: the context related to this list.
@@ -1753,6 +1755,7 @@ struct hl_vm_phys_pg_pack {
 	u64			*pages;
 	u64			npages;
 	u64			total_size;
+	struct list_head	node;
 	atomic_t		mapping_cnt;
 	u32			exporting_cnt;
 	u32			asid;
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index c1eefaebacb6..4a5d3a179765 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2607,11 +2607,12 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
  */
 void hl_vm_ctx_fini(struct hl_ctx *ctx)
 {
-	struct hl_device *hdev = ctx->hdev;
-	struct hl_vm *vm = &hdev->vm;
 	struct hl_vm_phys_pg_pack *phys_pg_list;
+	struct hl_device *hdev = ctx->hdev;
 	struct hl_vm_hash_node *hnode;
+	struct hl_vm *vm = &hdev->vm;
 	struct hlist_node *tmp_node;
+	struct list_head free_list;
 	struct hl_mem_in args;
 	int i;
 
@@ -2644,19 +2645,24 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 
 	mutex_unlock(&ctx->mmu_lock);
 
+	INIT_LIST_HEAD(&free_list);
+
 	spin_lock(&vm->idr_lock);
 	idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
 		if (phys_pg_list->asid == ctx->asid) {
 			dev_dbg(hdev->dev,
 				"page list 0x%px of asid %d is still alive\n",
 				phys_pg_list, ctx->asid);
-			atomic64_sub(phys_pg_list->total_size,
-					&hdev->dram_used_mem);
-			free_phys_pg_pack(hdev, phys_pg_list);
+
+			atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem);
 			idr_remove(&vm->phys_pg_pack_handles, i);
+			list_add(&phys_pg_list->node, &free_list);
 		}
 	spin_unlock(&vm->idr_lock);
 
+	list_for_each_entry(phys_pg_list, &free_list, node)
+		free_phys_pg_pack(hdev, phys_pg_list);
+
 	va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
 	va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
 

From ca4c8e4e7b57eec403e9887cd8b47d2ff8c97811 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 12 Jan 2022 11:00:52 +0200
Subject: [PATCH 19/50] habanalabs: avoid copying pll data if pll_info_get
 fails

If reading PLL info from F/W fails, the PLL info is not set in the
"result" variable, and hence shouldn't be copied to the caller's array.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 93e9045461ae..7f76f98154f0 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -958,15 +958,17 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
-	if (rc)
+	if (rc) {
 		dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
+		return rc;
+	}
 
 	pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
 	pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
 	pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
 	pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
 
-	return rc;
+	return 0;
 }
 
 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)

From aa3e1f12a2c1d6e36ce18c153706c906306b2e2d Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 12 Jan 2022 11:20:41 +0200
Subject: [PATCH 20/50] habanalabs: add missing error check in sysfs
 clk_freq_mhz_show

Add a missing error check in the sysfs show functions for
clk_max_freq_mhz and clk_cur_freq_mhz_show.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/sysfs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 3f220dd3b6b2..c665a3ac8876 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -18,6 +18,8 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute
 		return -ENODEV;
 
 	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
+	if (value < 0)
+		return value;
 
 	hdev->asic_prop.max_freq_value = value;
 
@@ -59,6 +61,8 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute
 		return -ENODEV;
 
 	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
+	if (value < 0)
+		return value;
 
 	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
 }

From 15f8eb190573aa7e38c1024fb6d20abf626298df Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Tue, 11 Jan 2022 11:10:20 +0200
Subject: [PATCH 21/50] habanalabs: fix soft reset flow in case of failure

In case of soft reset failure, hard reset should be initiated, but
reset flags were not set to enable it, which caused another soft reset
followed by another failure.
Updated reset flags to enable hard reset flow in case of soft reset
failure.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index c98a0a81e7d5..727315b36b8b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1293,11 +1293,14 @@ out_err:
 		hdev->reset_info.hard_reset_cnt++;
 	} else if (reset_upon_device_release) {
 		dev_err(hdev->dev, "Failed to reset device after user release\n");
+		flags |= HL_DRV_RESET_HARD;
+		flags &= ~HL_DRV_RESET_DEV_RELEASE;
 		hard_reset = true;
 		goto again;
 	} else {
 		dev_err(hdev->dev, "Failed to do soft-reset\n");
 		hdev->reset_info.soft_reset_cnt++;
+		flags |= HL_DRV_RESET_HARD;
 		hard_reset = true;
 		goto again;
 	}

From 4ae9548de7f84eb8f166f957cbcb9cc30f51bbd4 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 12 Jan 2022 11:40:43 +0200
Subject: [PATCH 22/50] habanalabs: add missing error check in sysfs
 max_power_show

Add a missing error check in the sysfs show function for max_power.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 4 ++--
 drivers/misc/habanalabs/common/habanalabs.h  | 2 +-
 drivers/misc/habanalabs/common/sysfs.c       | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 7f76f98154f0..39de9d86ee6c 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -2780,7 +2780,7 @@ void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 			used_pll_idx, rc);
 }
 
-u64 hl_fw_get_max_power(struct hl_device *hdev)
+long hl_fw_get_max_power(struct hl_device *hdev)
 {
 	struct cpucp_packet pkt;
 	u64 result;
@@ -2794,7 +2794,7 @@ u64 hl_fw_get_max_power(struct hl_device *hdev)
 
 	if (rc) {
 		dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
-		return (u64) rc;
+		return rc;
 	}
 
 	return result;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9c8374d88907..995167020c27 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3098,7 +3098,7 @@ int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *val
 int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
 int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
 void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
-u64 hl_fw_get_max_power(struct hl_device *hdev);
+long hl_fw_get_max_power(struct hl_device *hdev);
 void hl_fw_set_max_power(struct hl_device *hdev);
 int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index c665a3ac8876..9ebeb18ab85e 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -304,6 +304,8 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
 		return -ENODEV;
 
 	val = hl_fw_get_max_power(hdev);
+	if (val < 0)
+		return val;
 
 	return sprintf(buf, "%lu\n", val);
 }

From 008255ec3d799e82a5aa3e913800d196bcc5393a Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 17 Jan 2022 19:05:56 +0200
Subject: [PATCH 23/50] habanalabs: update to latest f/w specs

Copy the latest versions of the f/w specs files.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/include/common/hl_boot_if.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 135e21d6edc9..15f91ae9de6e 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -33,6 +33,7 @@ enum cpu_boot_err {
 	CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
 	CPU_BOOT_ERR_BINNING_FAIL = 19,
 	CPU_BOOT_ERR_TPM_FAIL = 20,
+	CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
 	CPU_BOOT_ERR_ENABLED = 31,
 	CPU_BOOT_ERR_SCND_EN = 63,
 	CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
@@ -111,6 +112,9 @@ enum cpu_boot_err {
  *
  * CPU_BOOT_ERR0_TPM_FAIL		TPM verification flow failed.
  *
+ * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	Failed to set threshold for tmperature
+ *					sensor.
+ *
  * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
  *					This is a main indication that the
  *					running FW populates the error
@@ -134,6 +138,7 @@ enum cpu_boot_err {
 #define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR		(1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
 #define CPU_BOOT_ERR0_BINNING_FAIL		(1 << CPU_BOOT_ERR_BINNING_FAIL)
 #define CPU_BOOT_ERR0_TPM_FAIL			(1 << CPU_BOOT_ERR_TPM_FAIL)
+#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	(1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
 #define CPU_BOOT_ERR0_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
 #define CPU_BOOT_ERR1_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
 

From aa3766def7506e5d9bd6c8387dcfe3629eb2a1f2 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Tue, 18 Jan 2022 14:58:30 +0200
Subject: [PATCH 24/50] habanalabs: expose number of user interrupts

Currently we only expose to the user the ID of the first available
user interrupt. To make user interrupts allocation truly dynamic, we
need to also expose the number of user interrupts.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 4 ++--
 include/uapi/misc/habanalabs.h                    | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index d19097576b05..c13a3c2a7013 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -92,8 +92,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 	hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
 	hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
 
-	hw_ip.first_available_interrupt_id =
-			prop->first_available_user_msix_interrupt;
+	hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt;
+	hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
 	hw_ip.server_type = prop->server_type;
 
 	return copy_to_user(out, &hw_ip,
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 371dfc4243b3..12976f7a8d84 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -404,6 +404,8 @@ enum hl_server_type {
  * @cpucp_version: The CPUCP f/w version.
  * @card_name: The card name as passed by the f/w.
  * @dram_page_size: The DRAM physical page size.
+ * @number_of_user_interrupts: The number of interrupts that are available to the userspace
+ *                             application to use. Relevant for Gaudi2 and later.
  */
 struct hl_info_hw_ip_info {
 	__u64 sram_base_address;
@@ -428,6 +430,9 @@ struct hl_info_hw_ip_info {
 	__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
 	__u64 reserved2;
 	__u64 dram_page_size;
+	__u32 reserved3;
+	__u16 number_of_user_interrupts;
+	__u16 pad2;
 };
 
 struct hl_info_dram_usage {

From 9a79e3e4a3637c07352d9723b825490a1b04391f Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Tue, 18 Jan 2022 12:16:53 +0200
Subject: [PATCH 25/50] habanalabs: reject host map with mmu disabled

This is not something we can do a workaround. It is clearly an error
and we should notify the user that it is an error.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 30 +++++++++----------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 4a5d3a179765..f60a9638d9ea 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1967,16 +1967,15 @@ err_dec_exporting_cnt:
 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
-	struct hl_ctx *ctx = hpriv->ctx;
 	u64 block_handle, device_addr = 0;
+	struct hl_ctx *ctx = hpriv->ctx;
 	u32 handle = 0, block_size;
-	int rc, dmabuf_fd = -EBADF;
+	int rc;
 
 	switch (args->in.op) {
 	case HL_MEM_OP_ALLOC:
 		if (args->in.alloc.mem_size == 0) {
-			dev_err(hdev->dev,
-				"alloc size must be larger than 0\n");
+			dev_err(hdev->dev, "alloc size must be larger than 0\n");
 			rc = -EINVAL;
 			goto out;
 		}
@@ -1997,15 +1996,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 
 	case HL_MEM_OP_MAP:
 		if (args->in.flags & HL_MEM_USERPTR) {
-			device_addr = args->in.map_host.host_virt_addr;
-			rc = 0;
+			dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n");
+			rc = -EPERM;
 		} else {
-			rc = get_paddr_from_handle(ctx, &args->in,
-							&device_addr);
+			rc = get_paddr_from_handle(ctx, &args->in, &device_addr);
+			memset(args, 0, sizeof(*args));
+			args->out.device_virt_addr = device_addr;
 		}
 
-		memset(args, 0, sizeof(*args));
-		args->out.device_virt_addr = device_addr;
 		break;
 
 	case HL_MEM_OP_UNMAP:
@@ -2013,20 +2011,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 		break;
 
 	case HL_MEM_OP_MAP_BLOCK:
-		rc = map_block(hdev, args->in.map_block.block_addr,
-				&block_handle, &block_size);
+		rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size);
 		args->out.block_handle = block_handle;
 		args->out.block_size = block_size;
 		break;
 
 	case HL_MEM_OP_EXPORT_DMABUF_FD:
-		rc = export_dmabuf_from_addr(ctx,
-				args->in.export_dmabuf_fd.handle,
-				args->in.export_dmabuf_fd.mem_size,
-				args->in.flags,
-				&dmabuf_fd);
-		memset(args, 0, sizeof(*args));
-		args->out.fd = dmabuf_fd;
+		dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n");
+		rc = -EPERM;
 		break;
 
 	default:

From 54faa5607b549ef7ef735e7d0c4523cc74bd20f4 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 19 Jan 2022 00:09:29 +0200
Subject: [PATCH 26/50] habanalabs: fix user interrupt wait when timeout is 0

When timeout is 0, we need to return the busy status in case the
target value wasn't reached upon entry to the ioctl.

Also return the correct timestamp.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 29e0549ff31d..8dd2f399d1c4 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2858,8 +2858,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				struct hl_cb_mgr *cb_mgr, u64 timeout_us,
 				u64 cq_counters_handle,	u64 cq_counters_offset,
 				u64 target_value, struct hl_user_interrupt *interrupt,
-				u32 *status,
-				u64 *timestamp)
+				u32 *status, u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
 	unsigned long timeout, flags;
@@ -2900,11 +2899,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		*status = HL_WAIT_CS_STATUS_COMPLETED;
 		/* There was no interrupt, we assume the completion is now. */
 		pend->fence.timestamp = ktime_get();
-	}
-
-	if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
 		goto set_timestamp;
 
+	} else if (!timeout_us) {
+		*status = HL_WAIT_CS_STATUS_BUSY;
+		pend->fence.timestamp = ktime_get();
+		goto set_timestamp;
+	}
+
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */

From 7a78d4d4819ec75c749d591c432d50cf7003448b Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 19 Jan 2022 00:10:43 +0200
Subject: [PATCH 27/50] habanalabs: fix race between wait and irq

There is a race in the user interrupts code, where between checking
the target value and adding the new pend to the list, there is a chance
the interrupt happened.

In that case, no one will complete the node, and we will get a timeout
on it.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 8dd2f399d1c4..307a95a039e0 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2892,16 +2892,21 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
 	pend->cq_target_value = target_value;
 
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+
 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
 	 */
 	if (*pend->cq_kernel_addr >= target_value) {
+		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
 		*status = HL_WAIT_CS_STATUS_COMPLETED;
 		/* There was no interrupt, we assume the completion is now. */
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
 
 	} else if (!timeout_us) {
+		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
@@ -2910,7 +2915,6 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */
-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 

From 930feb41efe2e799992ae07c1a274f68be7980ea Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 18 Jan 2022 12:31:15 +0200
Subject: [PATCH 28/50] habanalabs: prevent false heartbeat failure during
 soft-reset

The heartbeat thread is active during soft-reset, and it tries to send
messages to CPU-CP core.
Within the soft-reset, in the time window in which the device is marked
as disabled, any CPU-CP command is "silently" skipped and a success
value it returned.
However, in addition to the return value, the heartbeat function also
checks the F/W result, but because no command is sent in this time
window, the result variable won't hold the expected value and we will
have a false heartbeat failure.

To avoid it, modify the "silent" skip to be done only in hard-reset.
The CPU-CP should be able to handle messages during soft-reset.

In addition to the heartbeat problem, this should also solve other
issues in other flows that send messages during soft-reset and use the
F/W result as it w/o being aware to the reset.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 39de9d86ee6c..11957d36c6a9 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -214,7 +214,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 	dma_addr_t pkt_dma_addr;
 	struct hl_bd *sent_bd;
 	u32 tmp, expected_ack_val, pi;
-	int rc = 0;
+	int rc;
 
 	pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
 								&pkt_dma_addr);
@@ -228,8 +228,11 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 
 	mutex_lock(&hdev->send_cpu_message_lock);
 
-	if (hdev->disabled)
+	/* CPU-CP messages can be sent during soft-reset */
+	if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
+		rc = 0;
 		goto out;
+	}
 
 	if (hdev->device_cpu_disabled) {
 		rc = -EIO;

From d2cfd6897ce06aed87d989bf2a0b2d6bbba579db Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 20 Jan 2022 13:11:55 +0200
Subject: [PATCH 29/50] habanalabs: remove duplicate print

We print detailed messages inside the internal ioctl functions. No need
to print a generic message at the end, it doesn't add any information.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 307a95a039e0..0ea9a73e4aa5 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -3126,13 +3126,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 				args->in.interrupt_timeout_us, args->in.addr,
 				args->in.target, interrupt, &status,
 				&timestamp);
-	if (rc) {
-		if (rc != -EINTR)
-			dev_err_ratelimited(hdev->dev,
-				"interrupt_wait_ioctl failed (%d)\n", rc);
-
+	if (rc)
 		return rc;
-	}
 
 	memset(args, 0, sizeof(*args));
 	args->out.status = status;

From a8076c47f6f78e5709d3a6b89c146c2706637f02 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 Jan 2022 10:16:51 +0300
Subject: [PATCH 30/50] habanalabs: silence an uninitialized variable warning

Smatch warns that:

    drivers/misc/habanalabs/common/command_buffer.c:471 hl_cb_ioctl()
    error: uninitialized symbol 'device_va'.

Which is true, but harmless.  Anyway, it's easy to silence this by
adding a error check.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_buffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 3c0ae07a2d80..b9483a3cee91 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -464,6 +464,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 				args->in.flags,
 				&usage_cnt,
 				&device_va);
+		if (rc)
+			break;
 
 		memset(&args->out, 0, sizeof(args->out));
 

From b32cd10480ebcfd66c3f7ae9fece951e5b5b35ae Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 27 Dec 2021 21:18:03 +0200
Subject: [PATCH 31/50] habanalabs: fix race when waiting on encaps signal

Scenario:
1. CS which is part of encaps signal has been completed and now
executing kref_put to its encaps signal handle. The refcount of the
handle decremented to 0, and called the encaps signal handle
release function - hl_encaps_handle_do_release.

2. At this point the user starts waiting on the signal, and finds the
encaps signal handle in the handlers list and increment the habdle
refcount to 1.

3. Immediately after, hl_encaps_handle_do_release removed the handle
from the list and free its memory.

4. Wait function using the handle although it has been freed.

This scenario caused the slab area which was previously allocated
for the handle to be poison overwritten which triggered kernel bug
the next time the OS needed to allocate this slab.

Fixed by getting the refcount of the handle only in case it is not
zero.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 0ea9a73e4aa5..ba5215b77852 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2063,13 +2063,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 			idp = &ctx->sig_mgr.handles;
 			idr_for_each_entry(idp, encaps_sig_hdl, id) {
 				if (encaps_sig_hdl->cs_seq == signal_seq) {
-					handle_found = true;
-					/* get refcount to protect removing
-					 * this handle from idr, needed when
-					 * multiple wait cs are used with offset
+					/* get refcount to protect removing this handle from idr,
+					 * needed when multiple wait cs are used with offset
 					 * to wait on reserved encaps signals.
+					 * Since kref_put of this handle is executed outside the
+					 * current lock, it is possible that the handle refcount
+					 * is 0 but it yet to be removed from the list. In this
+					 * case need to consider the handle as not valid.
 					 */
-					kref_get(&encaps_sig_hdl->refcount);
+					if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
+						handle_found = true;
 					break;
 				}
 			}

From 9158bf69e74f98fea6847cca93bbf33a589bebcd Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Thu, 23 Dec 2021 13:24:34 +0200
Subject: [PATCH 32/50] habanalabs: Timestamps buffers registration

Timestamp registration API allows the user to register
a timestamp record event which will make the driver set
timestamp when CQ counter reaches the target value
and write it to a specific location specified
by the user.
This is a non blocking API, unlike the wait_for_interrupt
which is a blocking one.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 199 ++++++++++---
 drivers/misc/habanalabs/common/device.c       |  18 +-
 drivers/misc/habanalabs/common/habanalabs.h   | 106 ++++++-
 .../misc/habanalabs/common/habanalabs_drv.c   |   2 +
 drivers/misc/habanalabs/common/irq.c          | 127 ++++++++-
 drivers/misc/habanalabs/common/memory.c       | 261 ++++++++++++++++++
 include/uapi/misc/habanalabs.h                |  37 ++-
 7 files changed, 691 insertions(+), 59 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index ba5215b77852..c7757c78d0b1 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -14,6 +14,8 @@
 #define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
 				HL_CS_FLAGS_COLLECTIVE_WAIT)
 
+#define MAX_TS_ITER_NUM 10
+
 /**
  * enum hl_cs_wait_status - cs wait status
  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
@@ -924,7 +926,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 	int i;
 	struct hl_cs *cs, *tmp;
 
-	flush_workqueue(hdev->sob_reset_wq);
+	flush_workqueue(hdev->ts_free_obj_wq);
 
 	/* flush all completions before iterating over the CS mirror list in
 	 * order to avoid a race with the release functions
@@ -948,13 +950,19 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
-	struct hl_user_pending_interrupt *pend;
+	struct hl_user_pending_interrupt *pend, *temp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
-	list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
-		pend->fence.error = -EIO;
-		complete_all(&pend->fence.completion);
+	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
+		if (pend->ts_reg_info.ts_buff) {
+			list_del(&pend->wait_list_node);
+			hl_ts_put(pend->ts_reg_info.ts_buff);
+			hl_cb_put(pend->ts_reg_info.cq_cb);
+		} else {
+			pend->fence.error = -EIO;
+			complete_all(&pend->fence.completion);
+		}
 	}
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 }
@@ -2857,57 +2865,153 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
+static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
+					struct hl_cb *cq_cb,
+					u64 ts_offset, u64 cq_offset, u64 target_value,
+					spinlock_t *wait_list_lock,
+					struct hl_user_pending_interrupt **pend)
+{
+	struct hl_user_pending_interrupt *requested_offset_record =
+				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+				ts_offset;
+	struct hl_user_pending_interrupt *cb_last =
+			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
+	unsigned long flags, iter_counter = 0;
+	u64 current_cq_counter;
+
+	/* Validate ts_offset not exceeding last max */
+	if (requested_offset_record > cb_last) {
+		dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
+								(u64)(uintptr_t)cb_last);
+		return -EINVAL;
+	}
+
+start_over:
+	spin_lock_irqsave(wait_list_lock, flags);
+
+	/* Unregister only if we didn't reach the target value
+	 * since in this case there will be no handling in irq context
+	 * and then it's safe to delete the node out of the interrupt list
+	 * then re-use it on other interrupt
+	 */
+	if (requested_offset_record->ts_reg_info.in_use) {
+		current_cq_counter = *requested_offset_record->cq_kernel_addr;
+		if (current_cq_counter < requested_offset_record->cq_target_value) {
+			list_del(&requested_offset_record->wait_list_node);
+			spin_unlock_irqrestore(wait_list_lock, flags);
+
+			hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
+			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
+
+			dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
+		} else {
+			dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
+
+			/* irq handling in the middle give it time to finish */
+			spin_unlock_irqrestore(wait_list_lock, flags);
+			usleep_range(1, 10);
+			if (++iter_counter == MAX_TS_ITER_NUM) {
+				dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
+				return -EINVAL;
+			}
+
+			goto start_over;
+		}
+	} else {
+		spin_unlock_irqrestore(wait_list_lock, flags);
+	}
+
+	/* Fill up the new registration node info */
+	requested_offset_record->ts_reg_info.in_use = 1;
+	requested_offset_record->ts_reg_info.ts_buff = ts_buff;
+	requested_offset_record->ts_reg_info.cq_cb = cq_cb;
+	requested_offset_record->ts_reg_info.timestamp_kernel_addr =
+			(u64 *) ts_buff->user_buff_address + ts_offset;
+	requested_offset_record->cq_kernel_addr =
+			(u64 *) cq_cb->kernel_address + cq_offset;
+	requested_offset_record->cq_target_value = target_value;
+
+	*pend = requested_offset_record;
+
+	dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
+						(u64)(uintptr_t)requested_offset_record);
+	return 0;
+}
+
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-				struct hl_cb_mgr *cb_mgr, u64 timeout_us,
-				u64 cq_counters_handle,	u64 cq_counters_offset,
+				struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
+				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
 				u64 target_value, struct hl_user_interrupt *interrupt,
+				bool register_ts_record, u64 ts_handle, u64 ts_offset,
 				u32 *status, u64 *timestamp)
 {
+	u32 cq_patched_handle, ts_patched_handle;
 	struct hl_user_pending_interrupt *pend;
+	struct hl_ts_buff *ts_buff;
+	struct hl_cb *cq_cb;
 	unsigned long timeout, flags;
 	long completion_rc;
-	struct hl_cb *cb;
 	int rc = 0;
-	u32 handle;
 
 	timeout = hl_usecs64_to_jiffies(timeout_us);
 
 	hl_ctx_get(hdev, ctx);
 
-	cq_counters_handle >>= PAGE_SHIFT;
-	handle = (u32) cq_counters_handle;
-
-	cb = hl_cb_get(hdev, cb_mgr, handle);
-	if (!cb) {
-		hl_ctx_put(ctx);
-		return -EINVAL;
+	cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
+	cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
+	if (!cq_cb) {
+		rc = -EINVAL;
+		goto put_ctx;
 	}
 
-	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
-	if (!pend) {
-		hl_cb_put(cb);
-		hl_ctx_put(ctx);
-		return -ENOMEM;
+	if (register_ts_record) {
+		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
+					interrupt->interrupt_id, ts_offset, cq_counters_offset);
+
+		ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
+		ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
+		if (!ts_buff) {
+			rc = -EINVAL;
+			goto put_cq_cb;
+		}
+
+		/* Find first available record */
+		rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
+						cq_counters_offset, target_value,
+						&interrupt->wait_list_lock, &pend);
+		if (rc)
+			goto put_ts_buff;
+	} else {
+		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+		if (!pend) {
+			rc = -ENOMEM;
+			goto put_cq_cb;
+		}
+		hl_fence_init(&pend->fence, ULONG_MAX);
+		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
+		pend->cq_target_value = target_value;
 	}
 
-	hl_fence_init(&pend->fence, ULONG_MAX);
-
-	pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
-	pend->cq_target_value = target_value;
-
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 
 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
 	 */
 	if (*pend->cq_kernel_addr >= target_value) {
+		if (register_ts_record)
+			pend->ts_reg_info.in_use = 0;
 		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 		*status = HL_WAIT_CS_STATUS_COMPLETED;
-		/* There was no interrupt, we assume the completion is now. */
-		pend->fence.timestamp = ktime_get();
-		goto set_timestamp;
 
+		if (register_ts_record) {
+			*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
+			goto put_ts_buff;
+		} else {
+			pend->fence.timestamp = ktime_get();
+			goto set_timestamp;
+		}
 	} else if (!timeout_us) {
 		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 		*status = HL_WAIT_CS_STATUS_BUSY;
@@ -2916,11 +3020,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	}
 
 	/* Add pending user interrupt to relevant list for the interrupt
-	 * handler to monitor
+	 * handler to monitor.
+	 * Note that we cannot have sorted list by target value,
+	 * in order to shorten the list pass loop, since
+	 * same list could have nodes for different cq counter handle.
 	 */
 	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
+	if (register_ts_record) {
+		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
+		goto ts_registration_exit;
+	}
+
 	/* Wait for interrupt handler to signal completion */
 	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
 								timeout);
@@ -2952,15 +3064,30 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		}
 	}
 
+	/*
+	 * We keep removing the node from list here, and not at the irq handler
+	 * for completion timeout case. and if it's a registration
+	 * for ts record, the node will be deleted in the irq handler after
+	 * we reach the target value.
+	 */
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_del(&pend->wait_list_node);
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
-
 	kfree(pend);
-	hl_cb_put(cb);
+	hl_cb_put(cq_cb);
+ts_registration_exit:
+	hl_ctx_put(ctx);
+
+	return rc;
+
+put_ts_buff:
+	hl_ts_put(ts_buff);
+put_cq_cb:
+	hl_cb_put(cq_cb);
+put_ctx:
 	hl_ctx_put(ctx);
 
 	return rc;
@@ -3119,11 +3246,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
 
 	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
+		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
 				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
 				args->in.cq_counters_offset,
-				args->in.target, interrupt, &status,
-				&timestamp);
+				args->in.target, interrupt,
+				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
+				args->in.timestamp_handle, args->in.timestamp_offset,
+				&status, &timestamp);
 	else
 		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 727315b36b8b..60bfd737404b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -145,6 +145,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	hl_release_pending_user_interrupts(hpriv->hdev);
 
 	hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
+	hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
 	if (!hl_hpriv_put(hpriv))
@@ -209,6 +210,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	case HL_MMAP_TYPE_BLOCK:
 		return hl_hw_block_mmap(hpriv, vma);
+
+	case HL_MMAP_TYPE_TS_BUFF:
+		return hl_ts_mmap(hpriv, vma);
 	}
 
 	return -EINVAL;
@@ -410,10 +414,10 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_cq_wq;
 	}
 
-	hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
-	if (!hdev->sob_reset_wq) {
+	hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
+	if (!hdev->ts_free_obj_wq) {
 		dev_err(hdev->dev,
-			"Failed to allocate SOB reset workqueue\n");
+			"Failed to allocate Timestamp registration free workqueue\n");
 		rc = -ENOMEM;
 		goto free_eq_wq;
 	}
@@ -422,7 +426,7 @@ static int device_early_init(struct hl_device *hdev)
 					GFP_KERNEL);
 	if (!hdev->hl_chip_info) {
 		rc = -ENOMEM;
-		goto free_sob_reset_wq;
+		goto free_ts_free_wq;
 	}
 
 	rc = hl_mmu_if_set_funcs(hdev);
@@ -461,8 +465,8 @@ free_cb_mgr:
 	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 free_chip_info:
 	kfree(hdev->hl_chip_info);
-free_sob_reset_wq:
-	destroy_workqueue(hdev->sob_reset_wq);
+free_ts_free_wq:
+	destroy_workqueue(hdev->ts_free_obj_wq);
 free_eq_wq:
 	destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
@@ -501,7 +505,7 @@ static void device_early_fini(struct hl_device *hdev)
 
 	kfree(hdev->hl_chip_info);
 
-	destroy_workqueue(hdev->sob_reset_wq);
+	destroy_workqueue(hdev->ts_free_obj_wq);
 	destroy_workqueue(hdev->eq_wq);
 	destroy_workqueue(hdev->device_reset_work.wq);
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 995167020c27..b06e2b0812b6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -31,14 +31,15 @@
 #define HL_NAME				"habanalabs"
 
 /* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:61] - Encode mmap type
+ * bits[63:59] - Encode mmap type
  * bits[45:0]  - mmap offset value
  *
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define HL_MMAP_TYPE_SHIFT		(61 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK		(0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT		(59 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK		(0x1full << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_TS_BUFF		(0x10ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_BLOCK		(0x4ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_CB			(0x2ull << HL_MMAP_TYPE_SHIFT)
 
@@ -709,6 +710,40 @@ struct hl_cb_mgr {
 	struct idr		cb_handles; /* protected by cb_lock */
 };
 
+/**
+ * struct hl_ts_mgr - describes the timestamp registration memory manager.
+ * @ts_lock: protects ts_handles.
+ * @ts_handles: an idr to hold all ts bufferes handles.
+ */
+struct hl_ts_mgr {
+	spinlock_t		ts_lock;
+	struct idr		ts_handles;
+};
+
+/**
+ * struct hl_ts_buff - describes a timestamp buffer.
+ * @refcount: reference counter for usage of the buffer.
+ * @hdev: pointer to device this buffer belongs to.
+ * @mmap: true if the buff is currently mapped to user.
+ * @kernel_buff_address: Holds the internal buffer's kernel virtual address.
+ * @user_buff_address: Holds the user buffer's kernel virtual address.
+ * @id: the buffer ID.
+ * @mmap_size: Holds the buffer size that was mmaped.
+ * @kernel_buff_size: Holds the internal kernel buffer size.
+ * @user_buff_size: Holds the user buffer size.
+ */
+struct hl_ts_buff {
+	struct kref		refcount;
+	struct hl_device	*hdev;
+	atomic_t		mmap;
+	void			*kernel_buff_address;
+	void			*user_buff_address;
+	u32			id;
+	u32			mmap_size;
+	u32			kernel_buff_size;
+	u32			user_buff_size;
+};
+
 /**
  * struct hl_cb - describes a Command Buffer.
  * @refcount: reference counter for usage of the CB.
@@ -886,9 +921,54 @@ struct hl_user_interrupt {
 	u32			interrupt_id;
 };
 
+/**
+ * struct timestamp_reg_free_node - holds the timestamp registration free objects node
+ * @free_objects_node: node in the list free_obj_jobs
+ * @cq_cb: pointer to cq command buffer to be freed
+ * @ts_buff: pointer to timestamp buffer to be freed
+ */
+struct timestamp_reg_free_node {
+	struct list_head	free_objects_node;
+	struct hl_cb		*cq_cb;
+	struct hl_ts_buff	*ts_buff;
+};
+
+/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
+ * the job will be to pass over the free_obj_jobs list and put refcount to objects
+ * in each node of the list
+ * @free_obj: workqueue object to free timestamp registration node objects
+ * @hdev: pointer to the device structure
+ * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
+ */
+struct timestamp_reg_work_obj {
+	struct work_struct	free_obj;
+	struct hl_device	*hdev;
+	struct list_head	*free_obj_head;
+};
+
+/* struct timestamp_reg_info - holds the timestamp registration related data.
+ * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
+ *           relevant only when doing timestamps records registration.
+ * @cq_cb: pointer to CQ counter CB.
+ * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
+ *                         relevant only when doing timestamps records
+ *                         registration.
+ * @in_use: indicates if the node already in use. relevant only when doing
+ *          timestamps records registration, since in this case the driver
+ *          will have it's own buffer which serve as a records pool instead of
+ *          allocating records dynamically.
+ */
+struct timestamp_reg_info {
+	struct hl_ts_buff	*ts_buff;
+	struct hl_cb		*cq_cb;
+	u64			*timestamp_kernel_addr;
+	u8			in_use;
+};
+
 /**
  * struct hl_user_pending_interrupt - holds a context to a user thread
  *                                    pending on an interrupt
+ * @ts_reg_info: holds the timestamps registration nodes info
  * @wait_list_node: node in the list of user threads pending on an interrupt
  * @fence: hl fence object for interrupt completion
  * @cq_target_value: CQ target value
@@ -896,10 +976,11 @@ struct hl_user_interrupt {
  *                  handler for taget value comparison
  */
 struct hl_user_pending_interrupt {
-	struct list_head	wait_list_node;
-	struct hl_fence		fence;
-	u64			cq_target_value;
-	u64			*cq_kernel_addr;
+	struct timestamp_reg_info	ts_reg_info;
+	struct list_head		wait_list_node;
+	struct hl_fence			fence;
+	u64				cq_target_value;
+	u64				*cq_kernel_addr;
 };
 
 /**
@@ -1833,6 +1914,7 @@ struct hl_debug_params {
  * @ctx: current executing context. TODO: remove for multiple ctx per process
  * @ctx_mgr: context manager to handle multiple context for this FD.
  * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
+ * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
  * @debugfs_list: list of relevant ASIC debugfs.
  * @dev_node: node in the device list of file private data
  * @refcount: number of related contexts.
@@ -1845,6 +1927,7 @@ struct hl_fpriv {
 	struct hl_ctx		*ctx;
 	struct hl_ctx_mgr	ctx_mgr;
 	struct hl_cb_mgr	cb_mgr;
+	struct hl_ts_mgr	ts_mem_mgr;
 	struct list_head	debugfs_list;
 	struct list_head	dev_node;
 	struct kref		refcount;
@@ -2517,7 +2600,7 @@ struct hl_reset_info {
  * @cq_wq: work queues of completion queues for executing work in process
  *         context.
  * @eq_wq: work queue of event queue for executing work in process context.
- * @sob_reset_wq: work queue for sob reset executions.
+ * @ts_free_obj_wq: work queue for timestamp registration objects release.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @cs_mirror_list: CS mirror list for TDR.
@@ -2645,7 +2728,7 @@ struct hl_device {
 	struct hl_user_interrupt	common_user_interrupt;
 	struct workqueue_struct		**cq_wq;
 	struct workqueue_struct		*eq_wq;
-	struct workqueue_struct		*sob_reset_wq;
+	struct workqueue_struct		*ts_free_obj_wq;
 	struct hl_ctx			*kernel_ctx;
 	struct hl_hw_queue		*kernel_queues;
 	struct list_head		cs_mirror_list;
@@ -3128,6 +3211,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
 					const char *format, ...);
 char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
 const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
+void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
+void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
+int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
+void hl_ts_put(struct hl_ts_buff *buff);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 017bfc4551da..ca404ed9d9a7 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
 	hl_cb_mgr_init(&hpriv->cb_mgr);
 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
+	hl_ts_mgr_init(&hpriv->ts_mem_mgr);
 
 	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
 
@@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 out_err:
 	mutex_unlock(&hdev->fpriv_list_lock);
 	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+	hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 	filp->private_data = NULL;
 	mutex_destroy(&hpriv->restore_phase_mutex);
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index 1b6bdc900c26..c28f3a1c7a03 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+/*
+ * hl_ts_free_objects - handler of the free objects workqueue.
+ * This function should put refcount to objects that the registration node
+ * took refcount to them.
+ * @work: workqueue object pointer
+ */
+static void hl_ts_free_objects(struct work_struct *work)
+{
+	struct timestamp_reg_work_obj *job =
+			container_of(work, struct timestamp_reg_work_obj, free_obj);
+	struct timestamp_reg_free_node *free_obj, *temp_free_obj;
+	struct list_head *free_list_head = job->free_obj_head;
+	struct hl_device *hdev = job->hdev;
+
+	list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
+		dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
+					free_obj->ts_buff,
+					free_obj->cq_cb);
+
+		hl_ts_put(free_obj->ts_buff);
+		hl_cb_put(free_obj->cq_cb);
+		kfree(free_obj);
+	}
+
+	kfree(free_list_head);
+	kfree(job);
+}
+
+/*
+ * This function called with spin_lock of wait_list_lock taken
+ * This function will set timestamp and delete the registration node from the
+ * wait_list_lock.
+ * and since we're protected with spin_lock here, so we cannot just put the refcount
+ * for the objects here, since the release function may be called and it's also a long
+ * logic (which might sleep also) that cannot be handled in irq context.
+ * so here we'll be filling a list with nodes of "put" jobs and then will send this
+ * list to a dedicated workqueue to do the actual put.
+ */
+int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
+						struct list_head **free_list)
+{
+	struct timestamp_reg_free_node *free_node;
+	u64 timestamp;
+
+	if (!(*free_list)) {
+		/* Alloc/Init the timestamp registration free objects list */
+		*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
+		if (!(*free_list))
+			return -ENOMEM;
+
+		INIT_LIST_HEAD(*free_list);
+	}
+
+	free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
+	if (!free_node)
+		return -ENOMEM;
+
+	timestamp = ktime_get_ns();
+
+	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
+
+	dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
+			pend->ts_reg_info.timestamp_kernel_addr,
+			*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
+
+	list_del(&pend->wait_list_node);
+
+	/* Mark kernel CB node as free */
+	pend->ts_reg_info.in_use = 0;
+
+	/* Putting the refcount for ts_buff and cq_cb objects will be handled
+	 * in workqueue context, just add job to free_list.
+	 */
+	free_node->ts_buff = pend->ts_reg_info.ts_buff;
+	free_node->cq_cb = pend->ts_reg_info.cq_cb;
+	list_add(&free_node->free_objects_node, *free_list);
+
+	return 0;
+}
+
 static void handle_user_cq(struct hl_device *hdev,
 			struct hl_user_interrupt *user_cq)
 {
-	struct hl_user_pending_interrupt *pend;
+	struct hl_user_pending_interrupt *pend, *temp_pend;
+	struct list_head *ts_reg_free_list_head = NULL;
+	struct timestamp_reg_work_obj *job;
+	bool reg_node_handle_fail = false;
 	ktime_t now = ktime_get();
+	int rc;
+
+	/* For registration nodes:
+	 * As part of handling the registration nodes, we should put refcount to
+	 * some objects. the problem is that we cannot do that under spinlock
+	 * or in irq handler context at all (since release functions are long and
+	 * might sleep), so we will need to handle that part in workqueue context.
+	 * To avoid handling kmalloc failure which compels us rolling back actions
+	 * and move nodes hanged on the free list back to the interrupt wait list
+	 * we always alloc the job of the WQ at the beginning.
+	 */
+	job = kmalloc(sizeof(*job), GFP_ATOMIC);
+	if (!job)
+		return;
 
 	spin_lock(&user_cq->wait_list_lock);
-	list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
-		if ((pend->cq_kernel_addr &&
-				*(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+	list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
+		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
 				!pend->cq_kernel_addr) {
-			pend->fence.timestamp = now;
-			complete_all(&pend->fence.completion);
+			if (pend->ts_reg_info.ts_buff) {
+				if (!reg_node_handle_fail) {
+					rc = handle_registration_node(hdev, pend,
+									&ts_reg_free_list_head);
+					if (rc)
+						reg_node_handle_fail = true;
+				}
+			} else {
+				/* Handle wait target value node */
+				pend->fence.timestamp = now;
+				complete_all(&pend->fence.completion);
+			}
 		}
 	}
 	spin_unlock(&user_cq->wait_list_lock);
+
+	if (ts_reg_free_list_head) {
+		INIT_WORK(&job->free_obj, hl_ts_free_objects);
+		job->free_obj_head = ts_reg_free_list_head;
+		job->hdev = hdev;
+		queue_work(hdev->ts_free_obj_wq, &job->free_obj);
+	} else {
+		kfree(job);
+	}
 }
 
 /**
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index f60a9638d9ea..c0904081f37e 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -20,6 +20,9 @@ MODULE_IMPORT_NS(DMA_BUF);
 /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
 #define DRAM_POOL_PAGE_SIZE SZ_8M
 
+static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
+			struct hl_mem_in *args, u64 *handle);
+
 /*
  * The va ranges in context object contain a list with the available chunks of
  * device virtual memory.
@@ -2021,6 +2024,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 		rc = -EPERM;
 		break;
 
+	case HL_MEM_OP_TS_ALLOC:
+		rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
+		break;
 	default:
 		dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
 		rc = -EINVAL;
@@ -2031,6 +2037,258 @@ out:
 	return rc;
 }
 
+static void ts_buff_release(struct kref *ref)
+{
+	struct hl_ts_buff *buff;
+
+	buff = container_of(ref, struct hl_ts_buff, refcount);
+
+	vfree(buff->kernel_buff_address);
+	vfree(buff->user_buff_address);
+	kfree(buff);
+}
+
+struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
+					u32 handle)
+{
+	struct hl_ts_buff *buff;
+
+	spin_lock(&mgr->ts_lock);
+	buff = idr_find(&mgr->ts_handles, handle);
+	if (!buff) {
+		spin_unlock(&mgr->ts_lock);
+		dev_warn(hdev->dev,
+			"TS buff get failed, no match to handle 0x%x\n", handle);
+		return NULL;
+	}
+	kref_get(&buff->refcount);
+	spin_unlock(&mgr->ts_lock);
+
+	return buff;
+}
+
+void hl_ts_put(struct hl_ts_buff *buff)
+{
+	kref_put(&buff->refcount, ts_buff_release);
+}
+
+static void buff_vm_close(struct vm_area_struct *vma)
+{
+	struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
+	long new_mmap_size;
+
+	new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
+
+	if (new_mmap_size > 0) {
+		buff->mmap_size = new_mmap_size;
+		return;
+	}
+
+	atomic_set(&buff->mmap, 0);
+	hl_ts_put(buff);
+	vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct ts_buff_vm_ops = {
+	.close = buff_vm_close
+};
+
+int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_ts_buff *buff;
+	u32 handle, user_buff_size;
+	int rc;
+
+	/* We use the page offset to hold the idr and thus we need to clear
+	 * it before doing the mmap itself
+	 */
+	handle = vma->vm_pgoff;
+	vma->vm_pgoff = 0;
+
+	buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
+	if (!buff) {
+		dev_err(hdev->dev,
+			"TS buff mmap failed, no match to handle 0x%x\n", handle);
+		return -EINVAL;
+	}
+
+	/* Validation check */
+	user_buff_size = vma->vm_end - vma->vm_start;
+	if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
+		dev_err(hdev->dev,
+			"TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
+			user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
+		rc = -EINVAL;
+		goto put_buff;
+	}
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+	if (!access_ok(VERIFY_WRITE,
+		(void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
+#else
+	if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
+						user_buff_size)) {
+#endif
+		dev_err(hdev->dev,
+			"user pointer is invalid - 0x%lx\n",
+			vma->vm_start);
+
+		rc = -EINVAL;
+		goto put_buff;
+	}
+
+	if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
+		dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
+		rc = -EINVAL;
+		goto put_buff;
+	}
+
+	vma->vm_ops = &ts_buff_vm_ops;
+	vma->vm_private_data = buff;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
+	rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
+	if (rc) {
+		atomic_set(&buff->mmap, 0);
+		goto put_buff;
+	}
+
+	buff->mmap_size = buff->user_buff_size;
+	vma->vm_pgoff = handle;
+
+	return 0;
+
+put_buff:
+	hl_ts_put(buff);
+	return rc;
+}
+
+void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
+{
+	spin_lock_init(&mgr->ts_lock);
+	idr_init(&mgr->ts_handles);
+}
+
+void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
+{
+	struct hl_ts_buff *buff;
+	struct idr *idp;
+	u32 id;
+
+	idp = &mgr->ts_handles;
+
+	idr_for_each_entry(idp, buff, id) {
+		if (kref_put(&buff->refcount, ts_buff_release) != 1)
+			dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
+							id);
+	}
+
+	idr_destroy(&mgr->ts_handles);
+}
+
+static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
+{
+	struct hl_ts_buff *ts_buff = NULL;
+	u32 size;
+	void *p;
+
+	ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
+	if (!ts_buff)
+		return NULL;
+
+	/* Allocate the user buffer */
+	size = num_elements * sizeof(u64);
+	p = vmalloc_user(size);
+	if (!p)
+		goto free_mem;
+
+	ts_buff->user_buff_address = p;
+	ts_buff->user_buff_size = size;
+
+	/* Allocate the internal kernel buffer */
+	size = num_elements * sizeof(struct hl_user_pending_interrupt);
+	p = vmalloc(size);
+	if (!p)
+		goto free_user_buff;
+
+	ts_buff->kernel_buff_address = p;
+	ts_buff->kernel_buff_size = size;
+
+	return ts_buff;
+
+free_user_buff:
+	vfree(ts_buff->user_buff_address);
+free_mem:
+	kfree(ts_buff);
+	return NULL;
+}
+
+/**
+ * allocate_timestamps_buffers() - allocate timestamps buffers
+ * This function will allocate ts buffer that will later on be mapped to the user
+ * in order to be able to read the timestamp.
+ * in additon it'll allocate an extra buffer for registration management.
+ * since we cannot fail during registration for out-of-memory situation, so
+ * we'll prepare a pool which will be used as user interrupt nodes and instead
+ * of dynamically allocating nodes while registration we'll pick the node from
+ * this pool. in addtion it'll add node to the mapping hash which will be used
+ * to map user ts buffer to the internal kernel ts buffer.
+ * @hpriv: pointer to the private data of the fd
+ * @args: ioctl input
+ * @handle: user timestamp buffer handle as an output
+ */
+static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
+{
+	struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_ts_buff *ts_buff;
+	int rc = 0;
+
+	if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
+		dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
+				args->num_of_elements, TS_MAX_ELEMENTS_NUM);
+		return -EINVAL;
+	}
+
+	/* Allocate ts buffer object
+	 * This object will contain two buffers one that will be mapped to the user
+	 * and another internal buffer for the driver use only, which won't be mapped
+	 * to the user.
+	 */
+	ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
+	if (!ts_buff) {
+		rc = -ENOMEM;
+		goto out_err;
+	}
+
+	spin_lock(&ts_mgr->ts_lock);
+	rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
+	spin_unlock(&ts_mgr->ts_lock);
+	if (rc < 0) {
+		dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
+		goto release_ts_buff;
+	}
+
+	ts_buff->id = rc;
+	ts_buff->hdev = hdev;
+
+	kref_init(&ts_buff->refcount);
+
+	/* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
+	*handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
+	*handle <<= PAGE_SHIFT;
+
+	dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
+
+	return 0;
+
+release_ts_buff:
+	kref_put(&ts_buff->refcount, ts_buff_release);
+out_err:
+	*handle = 0;
+	return rc;
+}
+
 int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	enum hl_device_status status;
@@ -2146,6 +2404,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 		args->out.fd = dmabuf_fd;
 		break;
 
+	case HL_MEM_OP_TS_ALLOC:
+		rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
+		break;
 	default:
 		dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
 		rc = -EINVAL;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 12976f7a8d84..e21db03196ae 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
  *
- * Copyright 2016-2020 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -30,6 +30,9 @@
  */
 #define GAUDI_FIRST_AVAILABLE_W_S_MONITOR		72
 
+/* Max number of elements in timestamps registration buffers */
+#define	TS_MAX_ELEMENTS_NUM				(1 << 20) /* 1MB */
+
 /*
  * Goya queue Numbering
  *
@@ -695,10 +698,12 @@ struct hl_cb_in {
 	__u64 cb_handle;
 	/* HL_CB_OP_* */
 	__u32 op;
+
 	/* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
 	 * will be allocated, regardless of this parameter's value, is PAGE_SIZE
 	 */
 	__u32 cb_size;
+
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
 	/* HL_CB_FLAGS_* */
@@ -964,6 +969,7 @@ union hl_cs_args {
 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK		0xFFF00000
 #define HL_WAIT_CS_FLAGS_MULTI_CS		0x4
 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ	0x10
+#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT	0x20
 
 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN	32
 
@@ -1036,6 +1042,20 @@ struct hl_wait_cs_in {
 	 * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
 	 */
 	__u64 cq_counters_offset;
+
+	/*
+	 * Timestamp_handle timestamps buffer handle.
+	 * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
+	 */
+	__u64 timestamp_handle;
+
+	/*
+	 * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above.
+	 * upon interrupt, if the cq reached the target value then driver will write
+	 * timestamp to this offset.
+	 * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
+	 */
+	__u64 timestamp_offset;
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED	0
@@ -1082,6 +1102,14 @@ union hl_wait_cs_args {
  */
 #define HL_MEM_OP_EXPORT_DMABUF_FD	5
 
+/* Opcode to create timestamps pool for user interrupts registration support
+ * The memory will be allocated by the kernel driver, A timestamp buffer which the user
+ * will get handle to it for mmap, and another internal buffer used by the
+ * driver for registration management
+ * The memory will be freed when the user closes the file descriptor(ctx close)
+ */
+#define HL_MEM_OP_TS_ALLOC		6
+
 /* Memory flags */
 #define HL_MEM_CONTIGUOUS	0x1
 #define HL_MEM_SHARED		0x2
@@ -1173,9 +1201,14 @@ struct hl_mem_in {
 	 * DMA-BUF file/FD flags.
 	 */
 	__u32 flags;
+
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
-	__u32 pad;
+
+	/* number of timestamp elements
+	 * used only when HL_MEM_OP_TS_ALLOC opcode
+	 */
+	__u32 num_of_elements;
 };
 
 struct hl_mem_out {

From 960be39db6fe421c49e603bbd541ff2e397f280e Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Fri, 28 Jan 2022 10:11:39 +0200
Subject: [PATCH 33/50] habanalabs: fix spelling mistake

The name of the property is hints_range_reservation

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index c0904081f37e..4778f23d8098 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -655,7 +655,7 @@ static u64 get_va_block(struct hl_device *hdev,
 			continue;
 
 		/*
-		 * In case hint address is 0, and arc_hints_range_reservation
+		 * In case hint address is 0, and hints_range_reservation
 		 * property enabled, then avoid allocating va blocks from the
 		 * range reserved for hint addresses
 		 */

From 2a835946ee49462fc18eb6db9b53be789acaf2e5 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sun, 30 Jan 2022 10:08:52 +0200
Subject: [PATCH 34/50] habanalabs: rephrase error messages in PCI
 initialization

The iATU is an internal h/w machine inside Habana's PCI controller.
Mentioning it by name doesn't say anything to the user. It is better
to say the PCI controller initialization was not done successfully.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/pci/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index a6ffa342070c..bb9ce22bafc4 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -408,13 +408,13 @@ int hl_pci_init(struct hl_device *hdev)
 
 	rc = hdev->asic_funcs->pci_bars_map(hdev);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
+		dev_err(hdev->dev, "Failed to map PCI BAR addresses\n");
 		goto disable_device;
 	}
 
 	rc = hdev->asic_funcs->init_iatu(hdev);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to initialize iATU\n");
+		dev_err(hdev->dev, "PCI controller was not initialized successfully\n");
 		goto unmap_pci_bars;
 	}
 

From 57b6f02fff3e2a8c394bbde6724950d492160356 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sun, 30 Jan 2022 17:39:54 +0200
Subject: [PATCH 35/50] habanalabs: fix use-after-free bug

When the code iterates over the free list of physical pages nodes, it
deletes the physical page node which is used as the iterator.

Therefore, we need to use the safe version of the iteration to prevent
use-after-free.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 4778f23d8098..348daac621ee 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2860,7 +2860,7 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
  */
 void hl_vm_ctx_fini(struct hl_ctx *ctx)
 {
-	struct hl_vm_phys_pg_pack *phys_pg_list;
+	struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node;
 	struct hl_device *hdev = ctx->hdev;
 	struct hl_vm_hash_node *hnode;
 	struct hl_vm *vm = &hdev->vm;
@@ -2913,7 +2913,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 		}
 	spin_unlock(&vm->idr_lock);
 
-	list_for_each_entry(phys_pg_list, &free_list, node)
+	list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node)
 		free_phys_pg_pack(hdev, phys_pg_list);
 
 	va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);

From 9e70ac1aa7ba50f223b4784c9e19d9ee3e47e634 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 2 Feb 2022 10:36:29 +0200
Subject: [PATCH 36/50] habanalabs: add missing include of vmalloc.h

Use of vfree(), vmalloc_user(), vmalloc() and remap_vmalloc_range()
requires this include in some architectures.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 348daac621ee..7bd4cae87a54 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -11,6 +11,7 @@
 
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <linux/pci-p2pdma.h>
 
 MODULE_IMPORT_NS(DMA_BUF);

From 4a0b01fa63c9dc4f0dd3ad44168c9323e4da5172 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 2 Feb 2022 10:32:53 +0200
Subject: [PATCH 37/50] habanalabs: change function to static

handle_registration_node() is called directly from the irq handler
in irq.c, so it can be static.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index c28f3a1c7a03..e2bc128f2291 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -175,7 +175,7 @@ static void hl_ts_free_objects(struct work_struct *work)
  * so here we'll be filling a list with nodes of "put" jobs and then will send this
  * list to a dedicated workqueue to do the actual put.
  */
-int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
+static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
 						struct list_head **free_list)
 {
 	struct timestamp_reg_free_node *free_node;

From d01e6cc97b44d8501b5c24a36d60c9cab94640cd Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 12 Jan 2022 20:08:01 +0200
Subject: [PATCH 38/50] habanalabs: enable stop-on-error debugfs setting per
 ASIC

On Goya and Gaudi, the stop-on-error configuration can be set via
debugfs. However, in future devices, this configuration will always be
enabled.
Modify the debugfs node to be allowed only for ASICs that support this
dynamic configuration.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 Documentation/ABI/testing/debugfs-driver-habanalabs | 1 +
 drivers/misc/habanalabs/common/debugfs.c            | 6 ++++++
 drivers/misc/habanalabs/common/habanalabs.h         | 2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c               | 2 ++
 drivers/misc/habanalabs/goya/goya.c                 | 2 ++
 5 files changed, 13 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 783001a574b3..bcf6915987e4 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -222,6 +222,7 @@ KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
 Description:    Sets the stop-on_error option for the device engines. Value of
                 "0" is for disable, otherwise enable.
+                Relevant only for GOYA and GAUDI.
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
 Date:           Sep 2021
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index e3ee5f45d20c..9f0aaf0ef43b 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1071,6 +1071,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
 	char tmp_buf[200];
 	ssize_t rc;
 
+	if (!hdev->asic_prop.configurable_stop_on_err)
+		return -EOPNOTSUPP;
+
 	if (*ppos)
 		return 0;
 
@@ -1089,6 +1092,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
 	u32 value;
 	ssize_t rc;
 
+	if (!hdev->asic_prop.configurable_stop_on_err)
+		return -EOPNOTSUPP;
+
 	if (hdev->reset_info.in_reset) {
 		dev_warn_ratelimited(hdev->dev,
 				"Can't change stop on error during reset\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index b06e2b0812b6..93116fe71ef6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -561,6 +561,7 @@ struct hl_hints_range {
  *                              use-case of doing soft-reset in training (due
  *                              to the fact that training runs on multiple
  *                              devices)
+ * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -644,6 +645,7 @@ struct asic_fixed_properties {
 	u8				use_get_power_for_reset_history;
 	u8				supports_soft_reset;
 	u8				allow_inference_soft_reset;
+	u8				configurable_stop_on_err;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f2242aa3baa2..61aa6dce6dde 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -669,6 +669,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 
 	prop->use_get_power_for_reset_history = true;
 
+	prop->configurable_stop_on_err = true;
+
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3785fb33260d..c8143b6616af 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -483,6 +483,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 
 	prop->use_get_power_for_reset_history = true;
 
+	prop->configurable_stop_on_err = true;
+
 	return 0;
 }
 

From 35629bc171b642d5f0bbb59345471d20ecff5ff7 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 18 Jan 2022 00:21:49 +0200
Subject: [PATCH 39/50] habanalabs: use proper max_power variable for device
 utilization

The max_power variable which is used for calculating the device
utilization is the ASIC specific property which is set during init.
However, the max value can be modified via sysfs, and thus the updated
value in the device structure should be used instead.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 60bfd737404b..63e2449ad136 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -614,7 +614,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 	u64 max_power, curr_power, dc_power, dividend;
 	int rc;
 
-	max_power = hdev->asic_prop.max_power_default;
+	max_power = hdev->max_power;
 	dc_power = hdev->asic_prop.dc_power_default;
 	rc = hl_fw_cpucp_power_get(hdev, &curr_power);
 

From 2908826d09f829528cb507ce547967beccf4388d Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 18 Jan 2022 19:27:14 +0200
Subject: [PATCH 40/50] habanalabs: set max power on device init per ASIC

For current devices there is a need to send the max power value to F/W
during device init, for example because there might be several card
types.
In future devices, this info will be programmed in the device's EEPROM
and will be read by F/W, and hence the driver should not send it.

Modify the sending of the relevant message to be done only for ASIC
types that need it.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 3 ++-
 drivers/misc/habanalabs/common/habanalabs.h | 2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 2 ++
 drivers/misc/habanalabs/goya/goya.c         | 2 ++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 63e2449ad136..8ea9dfe3f79b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1541,7 +1541,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	/* Need to call this again because the max power might change,
 	 * depending on card type for certain ASICs
 	 */
-	hl_fw_set_max_power(hdev);
+	if (hdev->asic_prop.set_max_power_on_device_init)
+		hl_fw_set_max_power(hdev);
 
 	/*
 	 * hl_hwmon_init() must be called after device_late_init(), because only
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 93116fe71ef6..b5055ab528b8 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -562,6 +562,7 @@ struct hl_hints_range {
  *                              to the fact that training runs on multiple
  *                              devices)
  * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
+ * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -646,6 +647,7 @@ struct asic_fixed_properties {
 	u8				supports_soft_reset;
 	u8				allow_inference_soft_reset;
 	u8				configurable_stop_on_err;
+	u8				set_max_power_on_device_init;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 61aa6dce6dde..ad640a268a3c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -671,6 +671,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 
 	prop->configurable_stop_on_err = true;
 
+	prop->set_max_power_on_device_init = true;
+
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index c8143b6616af..c08d96e43c9f 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -485,6 +485,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 
 	prop->configurable_stop_on_err = true;
 
+	prop->set_max_power_on_device_init = true;
+
 	return 0;
 }
 

From 59456f4c2250e1fb15b1a82bfe2ef1aaf217fa1b Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 10 Feb 2022 10:56:02 +0200
Subject: [PATCH 41/50] habanalabs: avoid using an uninitialized variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compilation warning in
hl_cb_ioctl() @ command_buffer.c:
warning: ‘device_va’ may be used uninitialized in this function

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index b9483a3cee91..a507110f6443 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -424,8 +424,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	union hl_cb_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
+	u64 handle = 0, device_va = 0;
 	enum hl_device_status status;
-	u64 handle = 0, device_va;
 	u32 usage_cnt = 0;
 	int rc;
 

From 9349a321d327d08a4e91f9b6aca9c1f3a3a25306 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Mon, 14 Feb 2022 15:20:08 +0200
Subject: [PATCH 42/50] habanalabs: use kernel-doc for memory ioctl
 documentation

Re-format the comments for the memory ioctl structure to be in
kernel-doc style.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 111 +++++++++++++++++----------------
 1 file changed, 57 insertions(+), 54 deletions(-)

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index e21db03196ae..a2a953a91b5e 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -1116,98 +1116,101 @@ union hl_wait_cs_args {
 #define HL_MEM_USERPTR		0x4
 #define HL_MEM_FORCE_HINT	0x8
 
+/**
+ * structure hl_mem_in - structure that handle input args for memory IOCTL
+ * @union arg: union of structures to be used based on the input operation
+ * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions).
+ * @flags: flags for the memory operation (one of the HL_MEM_* definitions).
+ *         For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags.
+ * @ctx_id: context ID - currently not in use.
+ * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode.
+ */
 struct hl_mem_in {
 	union {
-		/* HL_MEM_OP_ALLOC- allocate device memory */
+		/**
+		 * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op)
+		 * @mem_size: memory size to allocate
+		 */
 		struct {
-			/* Size to alloc */
 			__u64 mem_size;
 		} alloc;
 
-		/* HL_MEM_OP_FREE - free device memory */
+		/**
+		 * structure for free-ing device memory (used with the HL_MEM_OP_FREE op)
+		 * @handle: handle returned from HL_MEM_OP_ALLOC
+		 */
 		struct {
-			/* Handle returned from HL_MEM_OP_ALLOC */
 			__u64 handle;
 		} free;
 
-		/* HL_MEM_OP_MAP - map device memory */
+		/**
+		 * structure for mapping device memory (used with the HL_MEM_OP_MAP op)
+		 * @hint_addr: requested virtual address of mapped memory.
+		 *             the driver will try to map the requested region to this hint
+		 *             address, as long as the address is valid and not already mapped.
+		 *             the user should check the returned address of the IOCTL to make
+		 *             sure he got the hint address.
+		 *             passing 0 here means that the driver will choose the address itself.
+		 * @handle: handle returned from HL_MEM_OP_ALLOC.
+		 */
 		struct {
-			/*
-			 * Requested virtual address of mapped memory.
-			 * The driver will try to map the requested region to
-			 * this hint address, as long as the address is valid
-			 * and not already mapped. The user should check the
-			 * returned address of the IOCTL to make sure he got
-			 * the hint address. Passing 0 here means that the
-			 * driver will choose the address itself.
-			 */
 			__u64 hint_addr;
-			/* Handle returned from HL_MEM_OP_ALLOC */
 			__u64 handle;
 		} map_device;
 
-		/* HL_MEM_OP_MAP - map host memory */
+		/**
+		 * structure for mapping host memory (used with the HL_MEM_OP_MAP op)
+		 * @host_virt_addr: address of allocated host memory.
+		 * @hint_addr: requested virtual address of mapped memory.
+		 *             the driver will try to map the requested region to this hint
+		 *             address, as long as the address is valid and not already mapped.
+		 *             the user should check the returned address of the IOCTL to make
+		 *             sure he got the hint address.
+		 *             passing 0 here means that the driver will choose the address itself.
+		 * @size: size of allocated host memory.
+		 */
 		struct {
-			/* Address of allocated host memory */
 			__u64 host_virt_addr;
-			/*
-			 * Requested virtual address of mapped memory.
-			 * The driver will try to map the requested region to
-			 * this hint address, as long as the address is valid
-			 * and not already mapped. The user should check the
-			 * returned address of the IOCTL to make sure he got
-			 * the hint address. Passing 0 here means that the
-			 * driver will choose the address itself.
-			 */
 			__u64 hint_addr;
-			/* Size of allocated host memory */
 			__u64 mem_size;
 		} map_host;
 
-		/* HL_MEM_OP_MAP_BLOCK - map a hw block */
+		/**
+		 * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op)
+		 * @block_addr:HW block address to map, a handle and size will be returned
+		 *             to the user and will be used to mmap the relevant block.
+		 *             only addresses from configuration space are allowed.
+		 */
 		struct {
-			/*
-			 * HW block address to map, a handle and size will be
-			 * returned to the user and will be used to mmap the
-			 * relevant block. Only addresses from configuration
-			 * space are allowed.
-			 */
 			__u64 block_addr;
 		} map_block;
 
-		/* HL_MEM_OP_UNMAP - unmap host memory */
+		/**
+		 * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op)
+		 * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP
+		 */
 		struct {
-			/* Virtual address returned from HL_MEM_OP_MAP */
 			__u64 device_virt_addr;
 		} unmap;
 
-		/* HL_MEM_OP_EXPORT_DMABUF_FD */
+		/**
+		 * structure for exporting DMABUF object (used with
+		 * the HL_MEM_OP_EXPORT_DMABUF_FD op)
+		 * @handle: handle returned from HL_MEM_OP_ALLOC.
+		 *          in Gaudi, where we don't have MMU for the device memory, the
+		 *          driver expects a physical address (instead of a handle) in the
+		 *          device memory space.
+		 * @mem_size: size of memory allocation. Relevant only for GAUDI
+		 */
 		struct {
-			/* Handle returned from HL_MEM_OP_ALLOC. In Gaudi,
-			 * where we don't have MMU for the device memory, the
-			 * driver expects a physical address (instead of
-			 * a handle) in the device memory space.
-			 */
 			__u64 handle;
-			/* Size of memory allocation. Relevant only for GAUDI */
 			__u64 mem_size;
 		} export_dmabuf_fd;
 	};
 
-	/* HL_MEM_OP_* */
 	__u32 op;
-	/* HL_MEM_* flags.
-	 * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the
-	 * DMA-BUF file/FD flags.
-	 */
 	__u32 flags;
-
-	/* Context ID - Currently not in use */
 	__u32 ctx_id;
-
-	/* number of timestamp elements
-	 * used only when HL_MEM_OP_TS_ALLOC opcode
-	 */
 	__u32 num_of_elements;
 };
 

From f23f280277d5a701de99c6652623b6bf8801c534 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Mon, 14 Feb 2022 15:22:00 +0200
Subject: [PATCH 43/50] habanalabs: allow user to set allocation page size

In future ASICs the MMU will be able to work with multiple page sizes,
thus a new flag is added to allow the user to set the requested page
size.

This flag is added since the whole DRAM is allocated for the user and
the user also should be familiar with the memory usage use case.

As such, the user may choose to "over allocate" memory in favor of
performance (for instance- large page allocations covers more memory
in less TLB entries).

For example: say available page sizes are of 1MB and 32MB. If user
wants to allocate 40MB the user can either set page size to 1MB and
allocate the exact amount of memory (but will result in 40 TLB entries)
or the user can use 32MB pages, "waste" 8MB of physical memory but
occupy only 2 TLB entries.

Note that this feature will be available only to ASIC that supports
multiple DRAM page sizes.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  5 ++++
 drivers/misc/habanalabs/common/memory.c     | 31 ++++++++++++++++++++-
 drivers/misc/habanalabs/gaudi/gaudi.c       |  3 +-
 drivers/misc/habanalabs/goya/goya.c         |  1 +
 include/uapi/misc/habanalabs.h              |  5 +++-
 5 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index b5055ab528b8..677ae4ff922c 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -563,6 +563,7 @@ struct hl_hints_range {
  *                              devices)
  * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
  * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
+ * @supports_user_set_page_size: true if user can set the allocation page size.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -648,6 +649,7 @@ struct asic_fixed_properties {
 	u8				allow_inference_soft_reset;
 	u8				configurable_stop_on_err;
 	u8				set_max_power_on_device_init;
+	u8				supports_user_set_page_size;
 };
 
 /**
@@ -1311,6 +1313,8 @@ struct fw_load_mgr {
  * @get_sob_addr: get SOB base address offset.
  * @set_pci_memory_regions: setting properties of PCI memory regions
  * @get_stream_master_qid_arr: get pointer to stream masters QID array
+ * @is_valid_dram_page_size: return true if page size is supported in device
+ *                           memory allocation, otherwise false.
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -1436,6 +1440,7 @@ struct hl_asic_funcs {
 	u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
 	void (*set_pci_memory_regions)(struct hl_device *hdev);
 	u32* (*get_stream_master_qid_arr)(void);
+	bool (*is_valid_dram_page_size)(u32 page_size);
 };
 
 
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 7bd4cae87a54..385bf3448c73 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -24,6 +24,31 @@ MODULE_IMPORT_NS(DMA_BUF);
 static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
 			struct hl_mem_in *args, u64 *handle);
 
+static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u32 psize;
+
+	/*
+	 * for ASIC that supports setting the allocation page size by user we will address
+	 * user's choice only if it is not 0 (as 0 means taking the default page size)
+	 */
+	if (prop->supports_user_set_page_size && args->alloc.page_size) {
+		psize = args->alloc.page_size;
+
+		if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) {
+			dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize);
+			return -EINVAL;
+		}
+	} else {
+		psize = hdev->asic_prop.dram_page_size;
+	}
+
+	*page_size = psize;
+
+	return 0;
+}
+
 /*
  * The va ranges in context object contain a list with the available chunks of
  * device virtual memory.
@@ -69,7 +94,11 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 	bool contiguous;
 
 	num_curr_pgs = 0;
-	page_size = hdev->asic_prop.dram_page_size;
+
+	rc = set_alloc_page_size(hdev, args, &page_size);
+	if (rc)
+		return rc;
+
 	num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
 	total_size = num_pgs * page_size;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index ad640a268a3c..a60c0e23d23c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9432,7 +9432,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.state_dump_init = gaudi_state_dump_init,
 	.get_sob_addr = gaudi_get_sob_addr,
 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
-	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
+	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
+	.is_valid_dram_page_size = NULL
 };
 
 /**
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index c08d96e43c9f..ec9358bcbf0b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5764,6 +5764,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.get_sob_addr = &goya_get_sob_addr,
 	.set_pci_memory_regions = goya_set_pci_memory_regions,
 	.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
+	.is_valid_dram_page_size = NULL
 };
 
 /*
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a2a953a91b5e..1d6b4f0c4159 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
  *
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -1130,9 +1130,12 @@ struct hl_mem_in {
 		/**
 		 * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op)
 		 * @mem_size: memory size to allocate
+		 * @page_size: page size to use on allocation. when the value is 0 the default page
+		 *             size will be taken.
 		 */
 		struct {
 			__u64 mem_size;
+			__u64 page_size;
 		} alloc;
 
 		/**

From 26ef1c000bc21a192618c9ec651dd36ba63ca00c Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 17 Feb 2022 16:07:03 +0200
Subject: [PATCH 44/50] habanalabs/gaudi: handle axi errors from NIC engines

Various AXI errors can occur in the NIC engines and are reported to
the driver by the f/w. Add code to print the errors and ack them to
the f/w.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 48 +++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index a60c0e23d23c..95201d995743 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7665,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
 		fw_alive->thread_id, fw_alive->uptime_seconds);
 }
 
+static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
+						void *data)
+{
+	char desc[64] = "", *type;
+	struct eq_nic_sei_event *eq_nic_sei = data;
+	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
+
+	switch (eq_nic_sei->axi_error_cause) {
+	case RXB:
+		type = "RXB";
+		break;
+	case RXE:
+		type = "RXE";
+		break;
+	case TXS:
+		type = "TXS";
+		break;
+	case TXE:
+		type = "TXE";
+		break;
+	case QPC_RESP:
+		type = "QPC_RESP";
+		break;
+	case NON_AXI_ERR:
+		type = "NON_AXI_ERR";
+		break;
+	case TMR:
+		type = "TMR";
+		break;
+	default:
+		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
+			eq_nic_sei->axi_error_cause);
+		type = "N/A";
+		break;
+	}
+
+	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
+			eq_nic_sei->id);
+	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
+		event_type, desc);
+}
+
 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
 {
 	/* GAUDI doesn't support any reset except hard-reset */
@@ -7898,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 				struct hl_eq_entry *eq_entry)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
+	u64 data = le64_to_cpu(eq_entry->data[0]);
 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
 	u32 fw_fatal_err_flag = 0;
 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
@@ -8095,6 +8138,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		hl_fw_unmask_irq(hdev, event_type);
 		break;
 
+	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
+		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
+		hl_fw_unmask_irq(hdev, event_type);
+		break;
+
 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
 		gaudi_print_irq_info(hdev, event_type, false);
 		gaudi_print_sm_sei_info(hdev, event_type,

From 100fcf1e111de3e8433777f305b042267c5ad79f Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 17 Feb 2022 16:08:09 +0200
Subject: [PATCH 45/50] habanalabs/gaudi: add missing handling of NIC related
 events

There are a few events that can arrive from the f/w and without proper
handling can cause errors to appear in the kernel log without reason.

Add the relevant handling that was missing.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c                  |  5 +++++
 .../misc/habanalabs/include/gaudi/gaudi_async_events.h | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 95201d995743..0eed0efae040 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7977,6 +7977,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 	case GAUDI_EVENT_PSOC_MEM_DERR:
 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
+	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
 	case GAUDI_EVENT_MMU_DERR:
@@ -8077,6 +8078,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 	case GAUDI_EVENT_PSOC_MEM_SERR:
 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
+	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
 		fallthrough;
@@ -8154,6 +8156,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		hl_fw_unmask_irq(hdev, event_type);
 		break;
 
+	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
+		break;
+
 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
 		gaudi_print_clk_change_info(hdev, event_type);
 		hl_fw_unmask_irq(hdev, event_type);
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index d966bd4dfea6..c07ed4ed304c 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -311,6 +311,16 @@ enum gaudi_async_event_id {
 	GAUDI_EVENT_FW_ALIVE_S = 645,
 	GAUDI_EVENT_DEV_RESET_REQ = 646,
 	GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
+	GAUDI_EVENT_STATUS_NIC0_ENG0 = 648,
+	GAUDI_EVENT_STATUS_NIC0_ENG1 = 649,
+	GAUDI_EVENT_STATUS_NIC1_ENG0 = 650,
+	GAUDI_EVENT_STATUS_NIC1_ENG1 = 651,
+	GAUDI_EVENT_STATUS_NIC2_ENG0 = 652,
+	GAUDI_EVENT_STATUS_NIC2_ENG1 = 653,
+	GAUDI_EVENT_STATUS_NIC3_ENG0 = 654,
+	GAUDI_EVENT_STATUS_NIC3_ENG1 = 655,
+	GAUDI_EVENT_STATUS_NIC4_ENG0 = 656,
+	GAUDI_EVENT_STATUS_NIC4_ENG1 = 657,
 	GAUDI_EVENT_FIX_POWER_ENV_S = 658,
 	GAUDI_EVENT_FIX_POWER_ENV_E = 659,
 	GAUDI_EVENT_FIX_THERMAL_ENV_S = 660,

From e8458e20e0a3c426ed5ed3ce590c05718c8b8e8e Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sun, 6 Feb 2022 08:50:43 +0200
Subject: [PATCH 46/50] habanalabs: make sure device mem alloc is page aligned

Working with MMU that supports multiple page sizes requires that mapping
of a page of a certain size will be aligned to the same size (e.g. the
physical address of 32MB page shall be aligned to 32MB).

To achieve this the gen_poll allocation is now using the "align" variant
to comply with the alignment requirements.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 385bf3448c73..e008d82e4ba3 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -90,8 +90,8 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	u64 paddr = 0, total_size, num_pgs, i;
 	u32 num_curr_pgs, page_size;
-	int handle, rc;
 	bool contiguous;
+	int handle, rc;
 
 	num_curr_pgs = 0;
 
@@ -110,7 +110,11 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 	contiguous = args->flags & HL_MEM_CONTIGUOUS;
 
 	if (contiguous) {
-		paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
+		if (is_power_of_2(page_size))
+			paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
+								total_size, NULL, page_size);
+		else
+			paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size);
 		if (!paddr) {
 			dev_err(hdev->dev,
 				"failed to allocate %llu contiguous pages with total size of %llu\n",
@@ -144,9 +148,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 			phys_pg_pack->pages[i] = paddr + i * page_size;
 	} else {
 		for (i = 0 ; i < num_pgs ; i++) {
-			phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
-							vm->dram_pg_pool,
-							page_size);
+			if (is_power_of_2(page_size))
+				phys_pg_pack->pages[i] =
+						(u64) gen_pool_dma_alloc_align(vm->dram_pg_pool,
+										page_size, NULL,
+										page_size);
+			else
+				phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool,
+										page_size);
 			if (!phys_pg_pack->pages[i]) {
 				dev_err(hdev->dev,
 					"Failed to allocate device memory (out of memory)\n");

From a78b07dcae2f9d6fafadb05540d8152f575d7e59 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Thu, 17 Feb 2022 16:15:26 +0200
Subject: [PATCH 47/50] habanalabs: Fix reset upon device release bug

In case user application was interrupted while some cs still in-flight
or in the middle of completion handling in driver, the
last refcount of the kernel private data for the user process
will not be put in the fd close flow, but in the cs completion
workqueue context.

This means that the device reset-upon-device-release will be called
from that context. During the reset flow, the driver flushes all the cs
workqueue to ensure that any scheduled work has run to completion,
and since we are running from the completion context we will
have deadlock.

Therefore, we need to skip flushing the workqueue in those cases.
It is safe to do it because the user won't be able to release the device
unless the workqueues are already empty.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c | 17 ++++++++++-------
 drivers/misc/habanalabs/common/device.c         | 13 ++++++++-----
 drivers/misc/habanalabs/common/habanalabs.h     |  2 +-
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index c7757c78d0b1..d93ef9f1c45c 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -921,18 +921,21 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 		complete_job(hdev, job);
 }
 
-void hl_cs_rollback_all(struct hl_device *hdev)
+void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
 {
 	int i;
 	struct hl_cs *cs, *tmp;
 
-	flush_workqueue(hdev->ts_free_obj_wq);
+	if (!skip_wq_flush) {
+		flush_workqueue(hdev->ts_free_obj_wq);
 
-	/* flush all completions before iterating over the CS mirror list in
-	 * order to avoid a race with the release functions
-	 */
-	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
-		flush_workqueue(hdev->cq_wq[i]);
+		/* flush all completions before iterating over the CS mirror list in
+		 * order to avoid a race with the release functions
+		 */
+		for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+			flush_workqueue(hdev->cq_wq[i]);
+
+	}
 
 	/* Make sure we don't have leftovers in the CS mirror list */
 	list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 8ea9dfe3f79b..d52381d1fbd2 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -685,7 +685,8 @@ static void take_release_locks(struct hl_device *hdev)
 	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 }
 
-static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
+static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
+				bool skip_wq_flush)
 {
 	if (hard_reset)
 		device_late_fini(hdev);
@@ -698,7 +699,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r
 	hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
 
 	/* Go over all the queues, release all CS and their jobs */
-	hl_cs_rollback_all(hdev);
+	hl_cs_rollback_all(hdev, skip_wq_flush);
 
 	/* Release all pending user interrupts, each pending user interrupt
 	 * holds a reference to user context
@@ -978,7 +979,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
 	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
-			reset_upon_device_release = false, schedule_hard_reset = false;
+			reset_upon_device_release = false, schedule_hard_reset = false,
+			skip_wq_flush = false;
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	struct hl_ctx *ctx;
 	int i, rc;
@@ -991,6 +993,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	hard_reset = !!(flags & HL_DRV_RESET_HARD);
 	from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
 	fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
+	skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
 
 	if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
 		hard_instead_soft = true;
@@ -1076,7 +1079,7 @@ again:
 		return 0;
 	}
 
-	cleanup_resources(hdev, hard_reset, fw_reset);
+	cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
 
 kill_processes:
 	if (hard_reset) {
@@ -1686,7 +1689,7 @@ void hl_device_fini(struct hl_device *hdev)
 
 	hl_hwmon_fini(hdev);
 
-	cleanup_resources(hdev, true, false);
+	cleanup_resources(hdev, true, false, false);
 
 	/* Kill processes here after CS rollback. This is because the process
 	 * can't really exit until all its CSs are done, which is what we
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 677ae4ff922c..cef4717d0916 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3054,7 +3054,7 @@ int hl_cb_pool_fini(struct hl_device *hdev);
 int hl_cb_va_pool_init(struct hl_ctx *ctx);
 void hl_cb_va_pool_fini(struct hl_ctx *ctx);
 
-void hl_cs_rollback_all(struct hl_device *hdev);
+void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);

From 9c27896ac1bb83ea5c461ce6f7089d02102a2b21 Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Thu, 24 Feb 2022 14:58:05 +0800
Subject: [PATCH 48/50] habanalabs: Add check for pci_enable_device

As the potential failure of the pci_enable_device(),
it should be better to check the return value and return
error if fails.

Fixes: 70b2f993ea4a ("habanalabs: create common folder")
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/debugfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 9f0aaf0ef43b..f18495545854 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
 		pci_set_power_state(hdev->pdev, PCI_D0);
 		pci_restore_state(hdev->pdev);
 		rc = pci_enable_device(hdev->pdev);
+		if (rc < 0)
+			return rc;
 	} else if (value == 2) {
 		pci_save_state(hdev->pdev);
 		pci_disable_device(hdev->pdev);

From b0106bc6fe5a99490f2a2407025db6f2b514b6dc Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 23 Feb 2022 14:46:37 +0200
Subject: [PATCH 49/50] habanalabs: add an option to delay a device reset

Several H/W events can be sent adjacently, even due to a single error.
If a hard-reset is triggered as part of handling one of these events,
the following events won't be handled.
The debug info from these missed events is important, sometimes even
more important than the one that was handled.

To allow handling these close events, add an option to delay a device
reset and use it when resetting due to H/W events.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 8 +++++++-
 drivers/misc/habanalabs/common/habanalabs.h | 4 ++++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 2 +-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index d52381d1fbd2..dc9341a64541 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -13,6 +13,8 @@
 #include <linux/pci.h>
 #include <linux/hwmon.h>
 
+#define HL_RESET_DELAY_USEC		10000	/* 10ms */
+
 enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
@@ -980,7 +982,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
 	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
 			reset_upon_device_release = false, schedule_hard_reset = false,
-			skip_wq_flush = false;
+			skip_wq_flush, delay_reset;
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	struct hl_ctx *ctx;
 	int i, rc;
@@ -994,6 +996,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
 	fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
 	skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
+	delay_reset = !!(flags & HL_DRV_RESET_DELAY);
 
 	if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
 		hard_instead_soft = true;
@@ -1043,6 +1046,9 @@ do_reset:
 		hdev->reset_info.in_reset = 1;
 		spin_unlock(&hdev->reset_info.lock);
 
+		if (delay_reset)
+			usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
+
 		handle_reset_trigger(hdev, flags);
 
 		/* This still allows the completion of some KDMA ops */
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index cef4717d0916..1edaf6ab67bd 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -142,6 +142,9 @@ enum hl_mmu_page_table_location {
  *
  * - HL_DRV_RESET_FW_FATAL_ERR
  *       Set if reset is due to a fatal error from FW
+ *
+ * - HL_DRV_RESET_DELAY
+ *       Set if a delay should be added before the reset
  */
 
 #define HL_DRV_RESET_HARD		(1 << 0)
@@ -151,6 +154,7 @@ enum hl_mmu_page_table_location {
 #define HL_DRV_RESET_DEV_RELEASE	(1 << 4)
 #define HL_DRV_RESET_BYPASS_REQ_TO_FW	(1 << 5)
 #define HL_DRV_RESET_FW_FATAL_ERR	(1 << 6)
+#define HL_DRV_RESET_DELAY		(1 << 7)
 
 #define HL_MAX_SOBS_PER_MONITOR	8
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 0eed0efae040..21c2b678ff72 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -8199,7 +8199,7 @@ reset_device:
 					| HL_DRV_RESET_BYPASS_REQ_TO_FW
 					| fw_fatal_err_flag);
 	else if (hdev->hard_reset_on_fw_events)
-		hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
+		hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag);
 	else
 		hl_fw_unmask_irq(hdev, event_type);
 }

From 655221c56739058ae73a7aabd90847a024dabff6 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Wed, 23 Feb 2022 13:03:56 +0200
Subject: [PATCH 50/50] habanalabs: remove deprecated firmware states

During driver and F/W handshake, driver waits for F/W to reach
certain states in order to progress with the boot flow.
Some of the states were deprecated a long time ago and were never
present on official firmwares. Therefore, let's remove them from
the handshake process.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 11957d36c6a9..3262126cc7ca 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1207,8 +1207,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
 		hdev,
 		cpu_boot_status_reg,
 		status,
-		(status == CPU_BOOT_STATUS_IN_UBOOT) ||
-		(status == CPU_BOOT_STATUS_DRAM_RDY) ||
 		(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),