habanalabs: fixes to the poll-timeout macros

- use conventional internal macro variables (double underscore prefix)
- adjust address casting
- on register poll using ELBI use ELBI read rather than BAR read on
  error condition
- remove unused macro

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ohad Sharabi 2022-07-04 08:33:57 +03:00 committed by Oded Gabbay
parent 3fc252670b
commit 20cd88a775

View File

@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
/* Timeout should be longer when working with simulator but cap the
* increased timeout to some maximum
*/
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \
({ \
ktime_t __timeout; \
u32 __elbi_read; \
int __rc = 0; \
if (hdev->pdev) \
__timeout = ktime_add_us(ktime_get(), timeout_us); \
else \
@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(u64) HL_SIM_MAX_TIMEOUT_US)); \
might_sleep_if(sleep_us); \
for (;;) { \
(val) = RREG32(addr); \
if (elbi) { \
__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
if (__rc) \
break; \
(val) = __elbi_read; \
} else {\
(val) = RREG32((u32)addr); \
} \
if (cond) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
(val) = RREG32(addr); \
if (elbi) { \
__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
if (__rc) \
break; \
(val) = __elbi_read; \
} else {\
(val) = RREG32((u32)addr); \
} \
break; \
} \
if (sleep_us) \
usleep_range((sleep_us >> 2) + 1, sleep_us); \
} \
(cond) ? 0 : -ETIMEDOUT; \
__rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
})
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false)
#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \
hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true)
/*
* poll array of register addresses.
* condition is satisfied if all registers values match the expected value.
* once some register in the array satisfies the condition it will not be polled again,
* this is done both for efficiency and due to some registers are "clear on read".
* TODO: use read from PCI bar in other places in the code (SW-91406)
*/
#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
timeout_us, elbi) \
({ \
ktime_t __timeout; \
u64 __elem_bitmask; \
u32 __read_val; \
u8 __arr_idx; \
int __rc = 0; \
\
if (hdev->pdev) \
__timeout = ktime_add_us(ktime_get(), timeout_us); \
else \
__timeout = ktime_add_us(ktime_get(),\
min(((u64)timeout_us * 10), \
(u64) HL_SIM_MAX_TIMEOUT_US)); \
\
might_sleep_if(sleep_us); \
if (arr_size >= 64) \
__rc = -EINVAL; \
else \
__elem_bitmask = BIT_ULL(arr_size) - 1; \
for (;;) { \
if (__rc) \
break; \
for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) { \
if (!(__elem_bitmask & BIT_ULL(__arr_idx))) \
continue; \
if (elbi) { \
__rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \
if (__rc) \
break; \
} else { \
__read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
} \
if (__read_val == (expected_val)) \
__elem_bitmask &= ~BIT_ULL(__arr_idx); \
} \
if (__rc || (__elem_bitmask == 0)) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
break; \
if (sleep_us) \
usleep_range((sleep_us >> 2) + 1, sleep_us); \
} \
__rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
})
#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \
timeout_us) \
hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
timeout_us, false)
#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \
timeout_us) \
hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
timeout_us, true)
/*
* address in this macro points always to a memory location in the
* host's (server's) memory. That location is updated asynchronously
@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(cond) ? 0 : -ETIMEDOUT; \
})
#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
timeout_us) \
({ \
ktime_t __timeout; \
if (hdev->pdev) \
__timeout = ktime_add_us(ktime_get(), timeout_us); \
else \
__timeout = ktime_add_us(ktime_get(),\
min((u64)(timeout_us * 10), \
(u64) HL_SIM_MAX_TIMEOUT_US)); \
might_sleep_if(sleep_us); \
for (;;) { \
(val) = readl(addr); \
if (cond) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
(val) = readl(addr); \
break; \
} \
if (sleep_us) \
usleep_range((sleep_us >> 2) + 1, sleep_us); \
} \
(cond) ? 0 : -ETIMEDOUT; \
})
#define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
({ \
struct user_mapped_block *p = blk; \